#!/usr/bin/env bash # SPDX-License-Identifier: MIT # shellcheck disable=SC2016,SC2059 # This script is executed by GitHub Actions for every pull request opened. # It currently accomplishes the following objectives: # # 1. Detect pages that were just copied (i.e. cp pages/{common,linux}/7z.md). # 2. Detect English pages that were added in a platform specific directory although # they already exist under 'common'. # 4. Detect translated pages that do not exist as English pages yet. # 5. Detect outdated pages. A page is marked as outdated when the number of # commands differ from the number of commands in the English page or the # contents of the commands differ from the English page. # 6. Detect other miscellaneous anomalies in the pages folder. # # Results are printed to stdout, logs and errors to stderr. # # NOTE: must be run from the repository root directory to correctly work! # NOTE: no `set -e`, failure of this script should not invalidate the build. VERBOSE=false while getopts ":v" opt; do case $opt in v) VERBOSE=true ;; *) echo "This argument is not valid for this script." ;; esac done if [[ $VERBOSE == true ]]; then DEBUG_LOG="debug.log" rm -f "$DEBUG_LOG" && touch "$DEBUG_LOG" exec {BASH_XTRACEFD}> "$DEBUG_LOG" export BASH_XTRACEFD set -x fi # Check for duplicated pages. function check_duplicates { local page="$1" # page path in the format 'pages<.language_code>/platform/pagename.md' local parts readarray -td'/' parts < <(echo -n "$page") local language_folder="${parts[0]}" if [[ $language_folder != "pages" ]]; then # only check for duplicates in English return 1 fi local platform="${parts[1]}" local file="${parts[2]}" case $platform in common) # skip common-platform ;; *) # check if page already exists under common if [[ -f "pages/common/$file" ]]; then printf "\x2d $MSG_EXISTS" "$page" 'common' fi ;; esac } function check_missing_english_page() { local page="$1" local english_page="pages/${page#pages*\/}" if [[ $page == "$english_page" ]]; then return 1 fi if [[ ! -f $english_page ]]; then printf "\x2d $MSG_NOT_EXISTS" "$page" "$english_page" fi } function count_commands() { local file="$1" local regex="$2" grep -c "$regex" "$file" } function strip_commands() { local file="$1" local regex="$2" local stripped_commands=() mapfile -t stripped_commands < <( grep "$regex" "$file" | sed 's/{{[^}]*}}/{{}}/g' | sed 's/<[^>]*>//g' | sed 's/([^)]*)//g' | sed 's/"[^"]*"/""/g' | sed "s/'[^']*'//g" | sed 's/`//g' ) printf "%s\n" "${stripped_commands[*]}" } function check_outdated_page() { local page="$1" local english_page="pages/${page#pages*\/}" local command_regex='^`[^`]\+`$' if [[ $page == "$english_page" || ! -f $english_page ]]; then return 1 fi local english_commands commands english_commands_as_string commands_as_string english_commands="$(count_commands "$english_page" "$command_regex")" commands="$(count_commands "$page" "$command_regex")" english_commands_as_string="$(strip_commands "$english_page" "$command_regex")" commands_as_string="$(strip_commands "$page" "$command_regex")" if [[ $english_commands != "$commands" ]]; then printf "\x2d $MSG_OUTDATED" "$page" "based on number of commands" elif [[ "$english_commands_as_string" != "$commands_as_string" ]]; then printf "\x2d $MSG_OUTDATED" "$page" "based on the command contents itself" fi } function check_more_info_link() { local page=$1 if grep -q "$page" "more-info-links.txt"; then printf "\x2d $MSG_MORE_INFO" "$page" fi } function check_page_title() { local page=$1 if grep -q "$page" "page-titles.txt"; then printf "\x2d $MSG_PAGE_TITLE" "$page" fi } # Look at git diff and check for copied/duplicated pages. function check_diff { local git_diff local line local entry git_diff="$(git diff --name-status --find-copies-harder --diff-filter=ACM origin/main -- pages*/)" if [[ -n $git_diff ]]; then echo -e "Check PR: git diff:\n$git_diff" >&2 else echo 'Check PR: git diff looks fine, no interesting changes detected.' >&2 return 0 fi python3 scripts/set-more-info-link.py -Sn > more-info-links.txt python3 scripts/set-page-title.py -Sn > page-titles.txt while read -r line; do readarray -td$'\t' entry < <(echo -n "$line") local change="${entry[0]}" local file1="${entry[1]}" local file2="${entry[2]}" case "$change" in C*) # file2 is a copy of file1 local percentage=${change#C} percentage=${percentage#0} percentage=${percentage#0} printf "\x2d $MSG_IS_COPY" "$file2" "$file1" "$percentage" ;; A) # file1 was newly added check_duplicates "$file1" check_missing_english_page "$file1" check_outdated_page "$file1" check_more_info_link "$file1" check_page_title "$file1" ;; M) # file1 was modified check_missing_english_page "$file1" check_outdated_page "$file1" check_more_info_link "$file1" check_page_title "$file1" ;; esac done <<< "$git_diff" rm more-info-links.txt page-titles.txt } # Recursively check the pages/ folder for anomalies. function check_structure { for platform in $PLATFORMS; do if [[ ! -d "pages/$platform" ]]; then printf "\x2d $MSG_NOT_DIR" "pages/$platform" else for page in "pages/$platform"/*; do if [[ ! -f $page ]]; then printf "\x2d $MSG_NOT_FILE" "$page" elif [[ ${page:(-3)} != ".md" ]]; then printf "\x2d $MSG_NOT_MD" "$page" fi done fi done } ################################### # MAIN ################################### MSG_EXISTS='The page `%s` already exists in the `%s` directory.\n' MSG_NOT_EXISTS='The page `%s` does not exists as English page `%s` yet.\n' MSG_OUTDATED='The page `%s` is outdated, %s, compared to the English page.\n' MSG_IS_COPY='The page `%s` seems to be a copy of `%s` (%d%% matching).\n' MSG_NOT_DIR='The file `%s` does not look like a directory.\n' MSG_NOT_FILE='The file `%s` does not look like a regular file.\n' MSG_NOT_MD='The file `%s` does not have a `.md` extension.\n' MSG_MORE_INFO='The page `%s` has a more info link that does not match the one in the English page. Please check the "More information:" translation as well using https://github.com/tldr-pages/tldr/blob/main/contributing-guides/translation-templates/more-info-link.md.\n' MSG_PAGE_TITLE='The page `%s` has a page title that does not match the one in the English page.\n' PLATFORMS=$(ls pages/) if [[ $CI == true && $GITHUB_REPOSITORY == "tldr-pages/tldr" && $PULL_REQUEST_ID != "" ]]; then check_diff check_structure else echo 'Not a pull request, refusing to run.' >&2 exit 0 fi