Added new merging script and updated build scripts accordingly

Recent commits in the PG17 code added additional API changes,
making the "single src directory with ifdefs" approach inpractical.

This commit adds a new python based script (documented with comments
in the file) to help with version specific merges, where the copied
heap files reside in srcXX directories, where XX is the version.
pull/209/head
Zsolt Parragi 1 year ago
parent f9ed3ce2ca
commit 6cbd7c879a
  1. 3
      .gitignore
  2. 18
      Makefile.in
  3. 23
      meson.build
  4. 409
      tools/heap_merge.sh
  5. 25
      tools/repl.sed
  6. 198
      tools/tool.py

3
.gitignore vendored

@ -1,9 +1,10 @@
*.so
*.o
__pycache__
/config.cache
/config.log
/config.status
/Makefile
/autom4te.cache
/configure~
/configure~

@ -24,16 +24,16 @@ TAP_TESTS = 1
OBJS = src/encryption/enc_tde.o \
src/encryption/enc_aes.o \
src/access/pg_tde_slot.o \
src/access/pg_tde_io.o \
src/access/pg_tdeam_visibility.o \
src/access/pg_tde_tdemap.o \
src/access/pg_tdeam.o \
src/access/pg_tdetoast.o \
src/access/pg_tde_prune.o \
src/access/pg_tde_vacuumlazy.o \
src/access/pg_tde_visibilitymap.o \
src/access/pg_tde_rewrite.o \
src/access/pg_tdeam_handler.o \
src$(MAJORVERSION)/access/pg_tde_io.o \
src$(MAJORVERSION)/access/pg_tdeam_visibility.o \
src$(MAJORVERSION)/access/pg_tdeam.o \
src$(MAJORVERSION)/access/pg_tdetoast.o \
src$(MAJORVERSION)/access/pg_tde_prune.o \
src$(MAJORVERSION)/access/pg_tde_vacuumlazy.o \
src$(MAJORVERSION)/access/pg_tde_visibilitymap.o \
src$(MAJORVERSION)/access/pg_tde_rewrite.o \
src$(MAJORVERSION)/access/pg_tdeam_handler.o \
src/access/pg_tde_ddl.o \
src/access/pg_tde_xlog.o \
src/transam/pg_tde_xact_handler.o \

@ -13,20 +13,23 @@ conf_data.set_quoted('PACKAGE_TARNAME', 'pg_tde')
configure_file(output : 'config.h',
configuration : conf_data)
pg_version = meson.project_version().substring(0,2)
src_version = 'src' + pg_version
pg_tde_sources = files(
'src/pg_tde.c',
'src/transam/pg_tde_xact_handler.c',
'src/access/pg_tde_tdemap.c',
'src/access/pg_tde_slot.c',
'src/access/pg_tdeam.c',
'src/access/pg_tdeam_handler.c',
'src/access/pg_tdeam_visibility.c',
'src/access/pg_tdetoast.c',
'src/access/pg_tde_io.c',
'src/access/pg_tde_prune.c',
'src/access/pg_tde_rewrite.c',
'src/access/pg_tde_vacuumlazy.c',
'src/access/pg_tde_visibilitymap.c',
src_version / 'access/pg_tdeam.c',
src_version / 'access/pg_tdeam_handler.c',
src_version / 'access/pg_tdeam_visibility.c',
src_version / 'access/pg_tdetoast.c',
src_version / 'access/pg_tde_io.c',
src_version / 'access/pg_tde_prune.c',
src_version / 'access/pg_tde_rewrite.c',
src_version / 'access/pg_tde_vacuumlazy.c',
src_version / 'access/pg_tde_visibilitymap.c',
'src/access/pg_tde_ddl.c',
'src/access/pg_tde_xlog.c',
@ -51,7 +54,7 @@ pg_tde_sources = files(
'src/pg_tde_event_capture.c',
)
incdir = include_directories('src/include', '.')
incdir = include_directories(src_version / 'include', 'src/include', '.')
deps_update = {'dependencies': contrib_mod_args.get('dependencies') + [curldep]}

@ -1,409 +0,0 @@
#!/bin/bash
# SCRIPT: patch_generator.sh
#-----------------------------
# This script generates patch between two PG commits and applies it to
# the TDE extension source.
set -o pipefail
## GLOBAL VARIABLES
export TDE="tde"
export SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
export WORKING_DIR="${WORKING_DIR:-$(mktemp -d -t $TDE)}"
export TDE_DIR="${WORKING_DIR}/tde"
export USER_TDE_DIR=""
export PG_COMMIT_BASE="${PG_COMMIT_BASE}"
export PG_COMMIT_LATEST="${PG_COMMIT_BASE}"
export TDE_COMMIT="${TDE_COMMIT}"
export FILES_BASE_DIR="pg_base"
export FILES_LATEST_DIR="pg_latest"
export FILES_PATCH_DIR="pg_patches"
export TDE_DRY_RUN="--dry-run"
export APPLY_PATCHES_FORCE=0
# Script variables
total_patches=0
total_patches_failed=0
declare -a patch_list_unclean=()
declare -a pg_header_file_map=("visibilitymap.h" "rewriteheap.h" "heapam_xlog.h" "hio.h" "heapam.h" "heaptoast.h")
declare -a tde_header_file_map=("pg_tde_visibilitymap.h" "pg_tde_rewrite.h" "pg_tdeam_xlog.h" "pg_tde_io.h" "pg_tdeam.h" "pg_tdetoast.h")
declare -a pg_c_file_map=("heapam.c" "heapam_handler.c" "heapam_visibility.c" "heaptoast.c" "hio.c" "pruneheap.c" "rewriteheap.c" "vacuumlazy.c" "visibilitymap.c")
declare -a tde_c_file_map=("pg_tdeam.c" "pg_tdeam_handler.c" "pg_tdeam_visibility.c" "pg_tdetoast.c" "pg_tde_io.c" "pg_tde_prune.c" "pg_tde_rewrite.c" "pg_tde_vacuumlazy.c" "pg_tde_visibilitymap.c")
## USAGE
usage()
{
errorCode=${1:-0}
cat << EOF
usage: $0 OPTIONS
This script generates file-wise patches between two PG commits and applies it to
the TDE extension source.
By default, it only performs a dry run of the patch application. See the usage
options below for applying clean patches or forcefully applying all patches.
It clones both PG and TDE repositories in the working directory. If TDE path is
specified either with its usage option or via the environment variable, then
the script will use the given TDE source code.
* All working folders folders created will carry "$TDE" as part of the folder name.
* This simplies the manual cleanup process.
OPTIONS can be:
-h Show this message
-a The patches are not applied by default. Specify this to
apply the generated patches. Otherwise, the script will
only perform a dryrun.
-f Force apply patches.
-b [PG_COMMIT_BASE] PG base commit hash/branch/tag for patch [REQUIRED]
-l [PG_COMMIT_LATEST] PG lastest commit hash/branch/tag for patch [REQUIRED]
-x [TDE_COMMIT] TDE commit hash/branch/tag to apply patch on [REQUIRED]
-t [USER_TDE_DIR] Source directory for TDE [Default: Cloned under WORKING_DIR]
-w [WORKING_DIR] Script working folder [Default: $WORKING_DIR]
* a folder where patches and relevant log
files may be created. This folder will not be removed
by the script, so better to keep it in the temp folder.
EOF
if [[ $errorCode -ne 0 ]];
then
exit_script $errorCode
fi
}
# Perform any required cleanup and exit with the given error/success code
exit_script()
{
# Reminder of manual cleanup
if [[ -d $WORKING_DIR ]];
then
printf "\n%20s\n" | tr " " "-"
printf "The following folder was created by the script and may require manual removal.\n"
printf "* %s\n" $WORKING_DIR
printf "%20s\n" | tr " " "-"
fi
# Exit with a given return code or 0 if none are provided.
exit ${1:-0}
}
# Raise the error for a failure to checkout required source
checkout_validate()
{
commit=$1
retval=$2
if [[ $rteval -ne 0 ]];
then
printf "%s is not a valid commit hash/branch/tag.\n" $commit
exit_script $retval
fi
}
# Vaildate arguments to ensure that we can safely run the benchmark
validate_args()
{
local USAGE_TEXT="See usage for details."
local PATH_ERROR_TEXT="path is not a valid directory."
if [[ ! -z "$USER_TDE_DIR" ]];
then
if [[ ! -d "$USER_TDE_DIR" ]];
then
printf "TDE %s %s\n" $PATH_ERROR_TEXT $USAGE_TEXT >&2
usage 1
fi
elif [[ -z "$TDE_COMMIT" ]];
then
printf "TDE_COMMIT is not specified. %s\n" $USAGE_TEXT >&2
usage 1
fi
if [[ ! -d "$WORKING_DIR" ]];
then
printf "Working folder %s %s\n" $PATH_ERROR_TEXT $USAGE_TEXT >&2
usage 1
fi
if [[ -z "$PG_COMMIT_BASE" ]];
then
printf "PG_COMMIT_BASE is not specified. %s\n" $USAGE_TEXT >&2
usage 1
fi
if [[ -z "$PG_COMMIT_LATEST" ]];
then
printf "PG_COMMIT_LATEST is not specified. %s\n" $USAGE_TEXT >&2
usage 1
fi
}
# Print the file mapping between PG and TDE
print_map()
{
printf "\n"
printf "%50s\n" | tr " " "="
printf "%s\n" "Heap Access to TDE File Map"
printf "%50s\n\n" | tr " " "="
printf "%s\n" "--- Header Files ---"
for (( i=0; i < ${#pg_header_file_map[@]}; i++ ));
do
printf "* %-20s --> %s\n" ${pg_header_file_map[$i]} ${tde_header_file_map[$i]}
done
printf "\n"
printf "%s\n" "--- C Files ---"
for (( i=0; i < ${#pg_c_file_map[@]}; i++ ));
do
printf "* %-20s --> %s\n" ${pg_c_file_map[$i]} ${tde_c_file_map[$i]}
done
printf "\n\n"
}
# Copy files from the PG source to the a separate folder.
# This function expects that we don't have duplicate file names.
copy_files()
{
local dest_folder=$1
shift
local file_list=("$@")
retval=0
for f in "${file_list[@]}";
do
find * -name $f -exec cp -rpv {} $dest_folder \;
retval=$?
if [[ $retval -ne 0 ]];
then
exit_script $retval
fi
done
}
# Compare two files and generate a patch
generate_file_patch()
{
f_base=$1
f_latest=$2
f_patch=$3
diff -u $f_base $f_latest > $f_patch
if [[ ! -s $f_patch ]];
then
rm -fv $f_patch
else
total_patches=$(expr $total_patches + 1)
fi
}
# Apply a given patch on a given file
apply_file_patch()
{
local file_to_patch=$1
local patch_file=$2
local apply_patch=${APPLY_PATCHES_FORCE}
echo "===> $APPLY_PATCHES_FORCE ==> $apply_patch"
if [[ -f $patch_file ]];
then
find * -name $file_to_patch | xargs -I{} echo "patch -p1 -t --dry-run {} $patch_file" | sh
if [[ $? -ne 0 ]];
then
total_patches_failed=$(expr $total_patches_failed + 1)
patch_list_unclean+=($(basename $patch_file))
patch_list_unclean+=($(basename $file_to_patch))
elif [[ -z "$TDE_DRY_RUN" ]];
then
apply_patch=1
fi
echo "ABOUT TO APPLY PATCH"
if [[ $apply_patch -eq 1 ]];
then
echo "APPLYING PACH"
find * -name $file_to_patch | xargs -I{} echo "patch -p1 -t {} $patch_file" | sh
fi
fi
}
# Generate file-wise patches using the
generate_pg_patches()
{
retval=0
mkdir $FILES_BASE_DIR
mkdir $FILES_LATEST_DIR
mkdir $FILES_PATCH_DIR
git clone https://github.com/postgres/postgres.git
# go into the postgres directory
pushd postgres
# safety net to ensure that any changes introduced due to git configuration are cleaned up
git checkout .
#checkout base source code
git checkout $PG_COMMIT_BASE
checkout_validate $PG_COMMIT_BASE $?
copy_files "$WORKING_DIR/$FILES_BASE_DIR" "${pg_header_file_map[@]}"
copy_files "$WORKING_DIR/$FILES_BASE_DIR" "${pg_c_file_map[@]}"
# safety net to ensure that any changes introduced due to git configuration are cleaned up
git checkout .
# do the latest checkout
git checkout $PG_COMMIT_LATEST
checkout_validate $PG_COMMIT_LATEST $?
copy_files "$WORKING_DIR/$FILES_LATEST_DIR" "${pg_header_file_map[@]}"
copy_files "$WORKING_DIR/$FILES_LATEST_DIR" "${pg_c_file_map[@]}"
# go back to the old directory
popd
# generate patches for the header files
for f in "${pg_header_file_map[@]}";
do
generate_file_patch "$FILES_BASE_DIR/$f" "$FILES_LATEST_DIR/$f" "$FILES_PATCH_DIR/$f.patch"
done
# generate patches for the c files
for f in "${pg_c_file_map[@]}";
do
generate_file_patch "$FILES_BASE_DIR/$f" "$FILES_LATEST_DIR/$f" "$FILES_PATCH_DIR/$f.patch"
done
}
# Apply patches to the TDE sources
tde_apply_patches()
{
# check if the $TDE folder exists. If not, then we have to clone it
if [[ ! -d "$TDE_DIR" ]];
then
t="$(basename $TDE_DIR)"
git clone https://github.com/Percona-Lab/pg_tde.git $t
fi
pushd $TDE_DIR
# do the required checkout
git checkout $TDE_COMMIT
checkout_validate $TDE_COMMIT $?
# apply patches to the header files
for (( i=0; i < ${#pg_header_file_map[@]}; i++ ));
do
patch_file=$WORKING_DIR/$FILES_PATCH_DIR/${pg_header_file_map[$i]}.patch
apply_file_patch ${tde_header_file_map[$i]} $patch_file
done
# apply patches to the header files
for (( i=0; i < ${#pg_c_file_map[@]}; i++ ));
do
patch_file=$WORKING_DIR/$FILES_PATCH_DIR/${pg_c_file_map[$i]}.patch
apply_file_patch ${tde_c_file_map[$i]} $patch_file
done
}
# Check options passed in.
while getopts "haf t:b:l:w:x:" OPTION
do
case $OPTION in
h)
usage
exit_script 1
;;
a)
TDE_DRY_RUN=""
;;
f)
APPLY_PATCHES_FORCE=1
;;
b)
PG_COMMIT_BASE=$OPTARG
;;
l)
PG_COMMIT_LATEST=$OPTARG
;;
t)
TDE_DIR=$OPTARG
;;
w)
WORK_DIR=$OPTARG
;;
x)
TDE_COMMIT=$OPTARG
;;
?)
usage
exit_script
;;
esac
done
# Validate and update setup
validate_args
# print the file map
print_map
# Let's move to the working directory
pushd $WORKING_DIR
# generate pg patches between the two commits
generate_pg_patches
# apply patches
tde_apply_patches
# We're done...
printf "\nJob completed!\n"
printf "\n\n"
printf "%50s\n" | tr " " "="
printf "RESULT SUMMARY\n"
printf "%50s\n" | tr " " "="
printf "Patches Generated = %s\n" $total_patches
printf "Patches Applied = %s\n" $(expr $total_patches - $total_patches_failed)
printf "Patches Failed = %s\n" $total_patches_failed
if [[ ${#patch_list_unclean[@]} -gt 0 ]];
then
printf "=> Failed Patch List\n"
fi
for (( i=0; i < ${#patch_list_unclean[@]}; i++ ));
do
printf "* %s --> %s\n" ${patch_list_unclean[$i]} ${patch_list_unclean[$(expr $i + 1)]}
i=$(expr $i + 1)
done
# Perform clean up and exit.
exit_script 0

@ -0,0 +1,25 @@
# These first few lines are only for the initial run, but should be harmless in later runs
s/\theap_/\ttdeheap_/g
s/\t\*heap_/\t*tdeheap_/g
s/ heap_/ tdeheap_/g
s/ \*heap_/ *tdeheap_/g
s/(heap_/ (tdeheap_/g
s/^heap_/tdeheap_/g
s/_heap_/_tdeheap_/g
s/-heap_/-tdeheap_/g
s/+heap_/+tdeheap_/g
s/!heap_/!tdeheap_/g
s/heapam_/pg_tdeam_/g
s/heap2_/tdeheap2_/g
s/heapgettup/tdeheapgettup/g
s/heapgetpage/tdeheapgetpage/g
s/visibilitymap_/tdeheap_visibilitymap_/g
s/RelationPutHeapTuple/tdeheap_RelationPutHeapTuple/g
s/RelationGetBufferForTuple/tdeheap_RelationGetBufferForTuple/g
s/TTSOpsBufferHeapTuple/TTSOpsTDEBufferHeapTuple/g
s/TTS_IS_BUFFERTUPLE/TTS_IS_TDE_BUFFERTUPLE/g
s/toast_tuple_externalize/tdeheap_toast_tuple_externalize/g
# Repairing error by earlier rule
s/num_tdeheap_tuples/num_heap_tuples/g
s/pgstat_update_tdeheap_dead_tuples/pgstat_update_heap_dead_tuples/g
s/tdeheap_xlog_deserialize_prune_and_freeze/heap_xlog_deserialize_prune_and_freeze/g

@ -0,0 +1,198 @@
# Simple helper script for upstream merges to the copied heap code
# It implements a few simple steps which can be used to automate
# most operations
#
# Generally this script assumes that pg_tde is checked out as a
# submodule inside postgres, in the contrib/pg_tde directory.
#
# Most methods interact with the currently checked out version
# of postgres, this part is not automated at all. Select the
# correct commit before executing functions!
#
# == copy <dst_folder>
#
# Copies the required heapam source files from the postgres repo,
# to the specified <dst_folder> inside the pg_tde repo. Also
# renames the files, places them in the correct directory, and
# runs the automatic sed replacement script.
#
# The sed replacements only cover the name changes, mainly changing "heap"
# to "tdeheap". It doesn't apply the actual encryption changes!
#
# It also creates a file named "COMMIT" in the directory, which contains the
# commit hash used.
#
# == diff <folder1> <folder2> <diff_folder>
#
# Runs diff on the tdeheap files between <folder1> and <folder2>, and places
# the results into <diff_folder>
#
# The assumption is that <folder1> contains the copied, but not TDEfied
# version of the files, while <folder2> is the actual current TDEfied code,
# and that way this command creates the "tde patch" for the given commit.
#
# For example, assuming that we have the PG16 tde sources in the src16
# directory, these steps create a diff for the current sources:
# 1. check out the src16/COMMIT commit
# 2. run `copy tmp_16dir`
# 3. run `diff tmp_16dir src16 diff16`
# 4. delete the tmp_16dir directory
#
# == apply <target_folder> <diff_folder>
#
# Applies the diffs created by the diff command from the <diff_folder> to the
# <target_folder> source directory.
#
# When the diff can't be applied cleanly, and there are conflicts, it still
# writes the file with conflicts, using the diff3 format (usual git conflict
# markers). which can be resolved manually.
#
# The recommended action in this case is to first create a commit with the
# conflicts as-is, and then create a separate commit with the conflicts
# resolved and the code working.
#
# This is mainly intended for version upgrades.
# For example, if the current version is 16, and the goal is creating the 17
# version:
# 1. create the src16 diff using the steps described in the `diff` section
# 2. checkout the 17 version in the postgres repo
# 3. use the copy command to create a base directory for the 17 version
# 4. create a commit with the src17 basefiles
# 5. use the apply command to apply the patches
# 6. commit things with conflicts
# 7. resolve the conflicts as needed
# 8. commit resolved/working sources
import shutil
import os
import subprocess
import sys
tools_directory = os.path.dirname(os.path.realpath(__file__))
pg_root = tools_directory + "/../../../"
heapam_src_dir = pg_root + "src/backend/access/heap/"
heapam_inc_dir = pg_root + "src/include/access/"
tde_root = tools_directory + "/../"
heapam_headers = {
"visibilitymap.h": "pg_tde_visibilitymap.h",
"rewriteheap.h": "pg_tde_rewrite.h",
"heapam_xlog.h": "pg_tdeam_xlog.h",
"hio.h": "pg_tde_io.h",
"heapam.h": "pg_tdeam.h",
"heaptoast.h": "pg_tdetoast.h"
}
heapam_sources = {
"heapam.c": "pg_tdeam.c",
"heapam_handler.c": "pg_tdeam_handler.c",
"heapam_visibility.c": "pg_tdeam_visibility.c",
"heaptoast.c": "pg_tdetoast.c",
"hio.c": "pg_tde_io.c",
"pruneheap.c": "pg_tde_prune.c",
"rewriteheap.c": "pg_tde_rewrite.c",
"vacuumlazy.c": "pg_tde_vacuumlazy.c",
"visibilitymap.c": "pg_tde_visibilitymap.c",
}
def copy_and_sed_things(files, src, dst):
os.makedirs(dst, exist_ok=True)
for original,copy in files.items():
print(" - ", original, "=>", copy)
shutil.copyfile(src+original, dst+copy)
subprocess.call(["sed", "-i", "-f", tools_directory + "/repl.sed", dst+copy])
def copy_upstream_things(dstdir):
print("Processing headers")
copy_and_sed_things(heapam_headers, heapam_inc_dir, tde_root + dstdir + "/include/access/")
print("Processing sources")
copy_and_sed_things(heapam_sources, heapam_src_dir, tde_root + dstdir + "/access/")
# Also create a commit file
cwd = os.getcwd()
os.chdir(pg_root)
commit_hash = subprocess.check_output(["git", "rev-parse", "HEAD"])
os.chdir(cwd)
f = open(tde_root + dstdir + "/COMMIT", "w")
f.write(commit_hash.decode("utf-8"))
f.close()
def save_diffs(files, src, dst, diffdir):
os.makedirs(tde_root + "/" + diffdir, exist_ok=True)
for _,copy in files.items():
print(" - ", copy + ".patch")
diff = subprocess.run(["diff", "-u", tde_root+src+"/"+copy, tde_root+dst+"/"+copy], stdout = subprocess.PIPE, stderr=subprocess.PIPE, check=False)
f = open(tde_root + "/" + diffdir + "/" + copy + ".patch", "w")
f.write(diff.stdout.decode("utf-8"))
f.close()
def diff_things(src, dst, diffdir):
print("Processing headers")
save_diffs(heapam_headers, src + "/include/access/", dst + "/include/access/", diffdir)
print("Processing sources")
save_diffs(heapam_sources, src + "/access/", dst + "/access/", diffdir)
def apply_diffs(files, dst, diffdir):
for _,copy in files.items():
print(" - ", copy + ".patch")
patch = subprocess.run(["patch", "--merge=diff3", "-l", "--no-backup-if-mismatch", tde_root+dst+"/"+copy, tde_root+"/"+diffdir+"/"+copy+".patch"], stdout = subprocess.PIPE, stderr=subprocess.PIPE, check=False)
print(patch.stdout.decode("utf-8"))
print(patch.stderr.decode("utf-8"))
def apply_things(dst, diffdir):
print("Processing headers")
apply_diffs(heapam_headers, dst + "/include/access/", diffdir)
print("Processing sources")
apply_diffs(heapam_sources, dst + "/access/", diffdir)
def rm_files(files, src):
for _,copy in files.items():
print(" - RM ", copy)
os.remove(tde_root+src+"/"+copy)
def rm_things(srcdir):
print("Processing headers")
rm_files(heapam_headers, srcdir + "/include/access/")
print("Processing sources")
rm_files(heapam_sources, srcdir + "/access/")
if len(sys.argv) < 2:
print("No command given! Commands:")
print(" - copy")
print(" - diff")
print(" - ppply")
print(" - rm ")
exit()
if sys.argv[1] == "copy":
if len(sys.argv) < 3:
print("No target directory given!")
print("Usage: tool.py copy <dstdir>")
exit()
copy_upstream_things(sys.argv[2])
if sys.argv[1] == "diff":
if len(sys.argv) < 5:
print("Not enough parameters!")
print("Usage: tool.py diff <copied_dir> <current_dir> <diff_dir>")
exit()
diff_things(sys.argv[2], sys.argv[3], sys.argv[4])
if sys.argv[1] == "apply":
if len(sys.argv) < 4:
print("Not enough parameters!")
print("Usage: tool.py patch <src_dir> <diff_dir>")
exit()
apply_things(sys.argv[2], sys.argv[3])
if sys.argv[1] == "rm":
if len(sys.argv) < 3:
print("No target directory given!")
print("Usage: tool.py rm <dstdir>")
exit()
rm_things(sys.argv[2])
Loading…
Cancel
Save