From 1fd08382f2ad89f6bb09d0970912f95a7d3d30d6 Mon Sep 17 00:00:00 2001 From: Michele Locati Date: Fri, 9 Mar 2018 18:09:56 +0100 Subject: Initial version --- README.md | 32 +++++ bin/incremental-git-filterbranch.sh | 254 ++++++++++++++++++++++++++++++++++++ 2 files changed, 286 insertions(+) create mode 100644 README.md create mode 100755 bin/incremental-git-filterbranch.sh diff --git a/README.md b/README.md new file mode 100644 index 0000000..b29b493 --- /dev/null +++ b/README.md @@ -0,0 +1,32 @@ +## Introduction + +[`git filter-branch`](https://git-scm.com/docs/git-filter-branch) is a really nice git feature. +For instance, it allows fancy stuff like subtree-splitting. + +Problems may arise when the repository contains a lot of commits: this operation can take a lot of time. + +Luckily recent versions of git allow us to perform this operation in an incremental way: +the first time `filter-branch` still requires some time, but following calls can be very fast. + + +## Requirements + +- recent bash shell +- git 2.5.0 or newer. + +## Usage + +Get the script, and customize the variables you can find at its beginning. + + +## Legal stuff + +Use at your own risk. +[MIT License](https://github.com/mlocati/incremental-git-filter-branch/blob/master/LICENSE). + + +## Credits + +Special thanks to [Ian Campbell](https://github.com/ijc) for the implementation of the `--state-branch` option of git, +and his hints about how it can be used. +This script works only thanks to him (and if it doesn't work I'm the only person to blame). diff --git a/bin/incremental-git-filterbranch.sh b/bin/incremental-git-filterbranch.sh new file mode 100755 index 0000000..c582faf --- /dev/null +++ b/bin/incremental-git-filterbranch.sh @@ -0,0 +1,254 @@ +#!/usr/bin/env bash +# +# ### AUTHORS ### +# +# - Michele Locati +# +# +# ### LICENSE ### +# +# MIT - https://github.com/mlocati/incremental-git-filter-branch/blob/master/LICENSE +# +# +# ### CONFIGURATION ### +# +# The source repository +SOURCE_REPOSITORY_URL=https://github.com/mlocati/incremental-git-filter-branch.git +# The destination repository +DESTINATION_REPOSITORY_URL=git@github.com:your/repository.git +# The filter to be applied +FILTER='--subdirectory-filter bin' +# The path to a local directory where we'll process the repositories +WORK_DIRECTORY="$(pwd)/temp" +# A space-separated list of branches to limit filtering to +BRANCH_WHITELIST='' +# A space-separated list of branches not to be parsed +BRANCH_BLACKLIST='' +# + + +# Exit immediately if a pipeline, a list, or a compound command, exits with a non-zero status. +set -o errexit +# Any trap on ERR is inherited by shell functions, command substitutions, and commands executed in a subshell environment. +set -o errtrace +# The return value of a pipeline is the value of the last (rightmost) command to exit with a non-zero status, or zero if all commands in the pipeline exit successfully +set -o pipefail +# Treat unset variables and parameters other than the special parameters "@" and "*" as an error when performing parameter expansion. +set -o nounset + +function setupEnvironment { + echo '# Setting up environment' + if [[ -z "${SOURCE_REPOSITORY_URL-}" ]]; then + echo 'Missing variable: SOURCE_REPOSITORY_URL' >&2 + exit 1 + fi + if [[ -z "${DESTINATION_REPOSITORY_URL-}" ]]; then + echo 'Missing variable: DESTINATION_REPOSITORY_URL' >&2 + exit 1 + fi + if [[ -z "${FILTER-}" ]]; then + echo 'Missing variable: FILTER' >&2 + exit 1 + fi + if [[ -z "${WORK_DIRECTORY-}" ]]; then + echo 'Missing variable: WORK_DIRECTORY' >&2 + exit 1 + fi + if [[ -z "${BRANCH_WHITELIST-}" ]]; then + BRANCH_WHITELIST='' + else + BRANCH_WHITELIST=$(printf "${BRANCH_WHITELIST}" | sed -r 's:[ \t\r\n]+: :g') + BRANCH_WHITELIST=$(trim "${BRANCH_WHITELIST}") + if [[ -n "${BRANCH_WHITELIST-}" ]]; then + BRANCH_WHITELIST=" ${BRANCH_WHITELIST} " + fi + fi + if [[ -z "${BRANCH_BLACKLIST-}" ]]; then + BRANCH_BLACKLIST='' + else + BRANCH_BLACKLIST=$(printf "${BRANCH_BLACKLIST}" | sed -r 's:[ \t\r\n]+: :g') + BRANCH_BLACKLIST=$(trim "${BRANCH_BLACKLIST}") + if [[ -n "${BRANCH_BLACKLIST-}" ]]; then + BRANCH_BLACKLIST=" ${BRANCH_BLACKLIST} " + fi + fi + if [[ -n ${BRANCH_WHITELIST} ]] && [[ -n ${BRANCH_BLACKLIST} ]]; then + echo 'You can not specify BRANCH_WHITELIST and BRANCH_BLACKLIST variables' >&2 + exit 1 + fi + SOURCE_REPOSITORY_DIR=${WORK_DIRECTORY}/source-$(md5 "${SOURCE_REPOSITORY_URL}") + WORKER_REPOSITORY_DIR=${WORK_DIRECTORY}/worker-$(md5 "${SOURCE_REPOSITORY_URL}${DESTINATION_REPOSITORY_URL}") + mkdir --parents --mode=0770 -- "${WORK_DIRECTORY}" +} + +function acquireLock { + echo '# Checking concurrency' + local LOCK_FILE=${WORKER_REPOSITORY_DIR}.lock + local WAITLOCK=1 + local TIMEOUT=3 + exec 9>"${LOCK_FILE}" + while :; do + flock --exclusive --timeout ${TIMEOUT} 9 && WAITLOCK=0 || true + if [[ ${WAITLOCK} -eq 0 ]]; then + break; + fi + echo '... still waiting...' + done +} + +function prepareLocalSourceRepository { + local CREATE_MIRROR=1 + if [[ -f "${SOURCE_REPOSITORY_DIR}/config" ]]; then + echo '# Updating source repository' + git -C "${SOURCE_REPOSITORY_DIR}" remote update --prune && CREATE_MIRROR=0 || true + fi + if [[ ${CREATE_MIRROR} -eq 1 ]]; then + echo '# Cloning source repository' + rm -rf "${SOURCE_REPOSITORY_DIR}" + git clone --mirror "${SOURCE_REPOSITORY_URL}" "${SOURCE_REPOSITORY_DIR}" + fi +} + +function getSourceRepositoryBranches { + echo '# Listing source branches' + # List all branches and takes only the part after "refs/heads/", and store them in the SOURCE_BRANCHES variable + SOURCE_BRANCHES=$(git -C "${SOURCE_REPOSITORY_DIR}" show-ref --heads | sed -r 's:^.*?refs/heads/::') + if [[ -z "${SOURCE_BRANCHES}" ]]; then + echo 'Failed to retrieve branch list' >&2 + exit 1 + fi + if [[ -n ${BRANCH_WHITELIST} ]]; then + local SOURCE_BRANCH + local MISSING_BRANCHES="${BRANCH_WHITELIST}" + for SOURCE_BRANCH in ${SOURCE_BRANCHES} ; do + MISSING_BRANCHES=$(printf "${MISSING_BRANCHES}" | sed -r "s: ${SOURCE_BRANCH} : :g") + done + MISSING_BRANCHES=$(trim "${MISSING_BRANCHES}") + if [[ -n ${MISSING_BRANCHES} ]]; then + printf "These branches specified in BRANCH_WHITELIST were not found in the source repository:\n${MISSING_BRANCHES}\n" >&2 + fi + fi +} + +function getSourceRepositoryTags { + echo '# Listing source tags' + # List all tags and takes only the part after "refs/tags/", and store them in the SOURCE_TAGS variable + SOURCE_TAGS=$(git -C "${SOURCE_REPOSITORY_DIR}" show-ref --tags | sed -r 's:^.*?refs/tags/::') +} + +function prepareWorkerRepository { + local NEW_CLONE=1 + if [[ -f "${WORKER_REPOSITORY_DIR}/.git/config" ]]; then + echo '# Checking working repository' + git -C "${WORKER_REPOSITORY_DIR}" fsck --no-dangling --connectivity-only && NEW_CLONE=0 || true + fi + if [[ ${NEW_CLONE} -eq 1 ]]; then + echo '# Creating working repository' + rm -rf "${WORKER_REPOSITORY_DIR}" + echo '# Adding destination to working repository' + ( + git clone --no-hardlinks --local --origin source "${SOURCE_REPOSITORY_DIR}" "${WORKER_REPOSITORY_DIR}" && \ + git -C "${WORKER_REPOSITORY_DIR}" remote add destination "${DESTINATION_REPOSITORY_URL}" && \ + git -C "${WORKER_REPOSITORY_DIR}" fetch --prune destination \ + ) || (rm -rf "${WORKER_REPOSITORY_DIR}" && false) + fi +} + +function shouldSkipBranch { + local BRANCH="${1}" + local RESULT='' + if [[ -n "${BRANCH_WHITELIST}" ]]; then + if [[ " ${BRANCH_WHITELIST} " != *" ${BRANCH} "* ]]; then + RESULT='not in whitelist' + fi + elif [[ " ${BRANCH_BLACKLIST} " = *" ${BRANCH} "* ]]; then + RESULT='in blacklist' + fi + printf "${RESULT}" +} + +function processBranch { + local BRANCH="${1}" + local NOT_UPDATED=1 + echo ' - fetch' + git -C "${WORKER_REPOSITORY_DIR}" fetch --quiet --tags source "${BRANCH}" + echo ' - checkout' + git -C "${WORKER_REPOSITORY_DIR}" checkout --quiet --force -B "filter-branch/source/${BRANCH}" "remotes/source/${BRANCH}" + echo ' - determining delta' + local RANGE="filter-branch/result/${BRANCH}" + local LAST=$(git -C "${WORKER_REPOSITORY_DIR}" show-ref -s "refs/heads/filter-branch/filtered/${BRANCH}" || true) + if [[ -n "${LAST}" ]]; then + RANGE="${LAST}..${RANGE}" + fi + local FETCH_HEAD=$(git -C "${WORKER_REPOSITORY_DIR}" rev-parse FETCH_HEAD) + if [[ "${LAST}" = "${FETCH_HEAD}" ]]; then + echo ' - nothing new, skipping' + else + echo ' - initializing filter' + rm -f "${WORKER_REPOSITORY_DIR}/.git/refs/filter-branch/originals/${BRANCH}/refs/heads/filter-branch/result/${BRANCH}" + git -C "${WORKER_REPOSITORY_DIR}" branch --force "filter-branch/result/${BRANCH}" FETCH_HEAD + rm -rf "${WORKER_REPOSITORY_DIR}.filter-branch" + echo " - filtering commits" + local FOUND_SOMETHING + git -C "${WORKER_REPOSITORY_DIR}" filter-branch \ + ${FILTER} \ + --tag-name-filter cat \ + --prune-empty \ + -d "${WORKER_REPOSITORY_DIR}.filter-branch" \ + --original "refs/filter-branch/originals/${BRANCH}" \ + --state-branch "refs/filter-branch/states/${BRANCH}" \ + -- ${RANGE} \ + && FOUND_SOMETHING=1 || FOUND_SOMETHING=0 # May fail with "Found nothing to rewrite" + echo " - storing state" + git -C "${WORKER_REPOSITORY_DIR}" branch -f "filter-branch/filtered/${BRANCH}" FETCH_HEAD + if [[ ${FOUND_SOMETHING} -eq 1 ]]; then + NOT_UPDATED=0 + fi + fi + return $NOT_UPDATED +} + +function processBranches { + local BRANCH + local SKIP_REASON + local PUSH_REFSPEC='' + for BRANCH in ${SOURCE_BRANCHES} ; do + echo "# Processing branch ${BRANCH}" + SKIP_REASON=$(shouldSkipBranch "${BRANCH}") + if [[ -n "${SKIP_REASON}" ]]; then + echo " - not to be processed (${SKIP_REASON})" + else + local BRANCH_UPDATED + processBranch "${BRANCH}" && PUSH_REFSPEC="${PUSH_REFSPEC} filter-branch/result/${BRANCH}:${BRANCH}" || true + fi + done + if [[ -z "${PUSH_REFSPEC}" ]]; then + echo "# No branch updated" + else + echo "# Pushing to destination repository" + git -C "${WORKER_REPOSITORY_DIR}" push --quiet --force --tags destination ${PUSH_REFSPEC} + fi +} + +function trim { + local STR="${1}" + while [[ ${STR} == ' '* ]]; do + STR="${STR## }" + done + while [[ ${STR} == *' ' ]]; do + STR="${STR%% }" + done + printf "${STR}" +} + +function md5 { + printf '%s' "%1" | md5sum | sed -e 's: .*$::' +} + +setupEnvironment +acquireLock +prepareLocalSourceRepository +getSourceRepositoryBranches +getSourceRepositoryTags +prepareWorkerRepository +processBranches -- cgit v1.2.3