check-pr.sh 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. #!/usr/bin/env bash
  2. # SPDX-License-Identifier: MIT
  3. # shellcheck disable=SC2016,SC2059
  4. # This script is executed by GitHub Actions for every pull request opened.
  5. # It currently accomplishes the following objectives:
  6. #
  7. # 1. Detect pages that were just copied (i.e. cp pages/{common,linux}/7z.md).
  8. # 2. Detect English pages that were added in a platform specific directory although
  9. # they already exist under 'common'.
  10. # 4. Detect translated pages that do not exist as English pages yet.
  11. # 5. Detect outdated pages. A page is marked as outdated when the number of
  12. # commands differ from the number of commands in the English page or the
  13. # contents of the commands differ from the English page.
  14. # 6. Detect other miscellaneous anomalies in the pages folder.
  15. #
  16. # Results are printed to stdout, logs and errors to stderr.
  17. #
  18. # NOTE: must be run from the repository root directory to correctly work!
  19. # NOTE: no `set -e`, failure of this script should not invalidate the build.
  20. VERBOSE=false
  21. while getopts ":v" opt; do
  22. case $opt in
  23. v)
  24. VERBOSE=true
  25. ;;
  26. *)
  27. echo "This argument is not valid for this script."
  28. ;;
  29. esac
  30. done
  31. if [[ $VERBOSE == true ]]; then
  32. DEBUG_LOG="debug.log"
  33. rm -f "$DEBUG_LOG" && touch "$DEBUG_LOG"
  34. exec {BASH_XTRACEFD}> "$DEBUG_LOG"
  35. export BASH_XTRACEFD
  36. set -x
  37. fi
  38. # Check for duplicated pages.
  39. function check_duplicates {
  40. local page="$1" # page path in the format 'pages<.language_code>/platform/pagename.md'
  41. local parts
  42. readarray -td'/' parts < <(echo -n "$page")
  43. local language_folder="${parts[0]}"
  44. if [[ $language_folder != "pages" ]]; then # only check for duplicates in English
  45. return 1
  46. fi
  47. local platform="${parts[1]}"
  48. local file="${parts[2]}"
  49. case $platform in
  50. common) # skip common-platform
  51. ;;
  52. *) # check if page already exists under common
  53. if [[ -f "pages/common/$file" ]]; then
  54. printf "\x2d $MSG_EXISTS" "$page" 'common'
  55. fi
  56. ;;
  57. esac
  58. }
  59. function check_missing_english_page() {
  60. local page="$1"
  61. local english_page="pages/${page#pages*\/}"
  62. if [[ $page == "$english_page" ]]; then
  63. return 1
  64. fi
  65. if [[ ! -f $english_page ]]; then
  66. printf "\x2d $MSG_NOT_EXISTS" "$page" "$english_page"
  67. fi
  68. }
  69. function count_commands() {
  70. local file="$1"
  71. local regex="$2"
  72. grep -c "$regex" "$file"
  73. }
  74. function strip_commands() {
  75. local file="$1"
  76. local regex="$2"
  77. local stripped_commands=()
  78. mapfile -t stripped_commands < <(
  79. grep "$regex" "$file" |
  80. sed 's/{{[^}]*}}/{{}}/g' |
  81. sed 's/<[^>]*>//g' |
  82. sed 's/([^)]*)//g' |
  83. sed 's/"[^"]*"/""/g' |
  84. sed "s/'[^']*'//g" |
  85. sed 's/`//g'
  86. )
  87. printf "%s\n" "${stripped_commands[*]}"
  88. }
  89. function check_outdated_page() {
  90. local page="$1"
  91. local english_page="pages/${page#pages*\/}"
  92. local command_regex='^`[^`]\+`$'
  93. if [[ $page == "$english_page" || ! -f $english_page ]]; then
  94. return 1
  95. fi
  96. local english_commands commands english_commands_as_string commands_as_string
  97. english_commands="$(count_commands "$english_page" "$command_regex")"
  98. commands="$(count_commands "$page" "$command_regex")"
  99. english_commands_as_string="$(strip_commands "$english_page" "$command_regex")"
  100. commands_as_string="$(strip_commands "$page" "$command_regex")"
  101. if [[ $english_commands != "$commands" ]]; then
  102. printf "\x2d $MSG_OUTDATED" "$page" "based on number of commands"
  103. elif [[ "$english_commands_as_string" != "$commands_as_string" ]]; then
  104. printf "\x2d $MSG_OUTDATED" "$page" "based on the command contents itself"
  105. fi
  106. }
  107. function check_more_info_link() {
  108. local page=$1
  109. if grep -q "$page" "more-info-links.txt"; then
  110. printf "\x2d $MSG_MORE_INFO" "$page"
  111. fi
  112. }
  113. function check_page_title() {
  114. local page=$1
  115. if grep -q "$page" "page-titles.txt"; then
  116. printf "\x2d $MSG_PAGE_TITLE" "$page"
  117. fi
  118. }
  119. # Look at git diff and check for copied/duplicated pages.
  120. function check_diff {
  121. local git_diff
  122. local line
  123. local entry
  124. git_diff="$(git diff --name-status --find-copies-harder --diff-filter=ACM origin/main -- pages*/)"
  125. if [[ -n $git_diff ]]; then
  126. echo -e "Check PR: git diff:\n$git_diff" >&2
  127. else
  128. echo 'Check PR: git diff looks fine, no interesting changes detected.' >&2
  129. return 0
  130. fi
  131. python3 scripts/set-more-info-link.py -Sn > more-info-links.txt
  132. python3 scripts/set-page-title.py -Sn > page-titles.txt
  133. while read -r line; do
  134. readarray -td$'\t' entry < <(echo -n "$line")
  135. local change="${entry[0]}"
  136. local file1="${entry[1]}"
  137. local file2="${entry[2]}"
  138. case "$change" in
  139. C*) # file2 is a copy of file1
  140. local percentage=${change#C}
  141. percentage=${percentage#0}
  142. percentage=${percentage#0}
  143. printf "\x2d $MSG_IS_COPY" "$file2" "$file1" "$percentage"
  144. ;;
  145. A) # file1 was newly added
  146. check_duplicates "$file1"
  147. check_missing_english_page "$file1"
  148. check_outdated_page "$file1"
  149. check_more_info_link "$file1"
  150. check_page_title "$file1"
  151. ;;
  152. M) # file1 was modified
  153. check_missing_english_page "$file1"
  154. check_outdated_page "$file1"
  155. check_more_info_link "$file1"
  156. check_page_title "$file1"
  157. ;;
  158. esac
  159. done <<< "$git_diff"
  160. rm more-info-links.txt page-titles.txt
  161. }
  162. # Recursively check the pages/ folder for anomalies.
  163. function check_structure {
  164. for platform in $PLATFORMS; do
  165. if [[ ! -d "pages/$platform" ]]; then
  166. printf "\x2d $MSG_NOT_DIR" "pages/$platform"
  167. else
  168. for page in "pages/$platform"/*; do
  169. if [[ ! -f $page ]]; then
  170. printf "\x2d $MSG_NOT_FILE" "$page"
  171. elif [[ ${page:(-3)} != ".md" ]]; then
  172. printf "\x2d $MSG_NOT_MD" "$page"
  173. fi
  174. done
  175. fi
  176. done
  177. }
  178. ###################################
  179. # MAIN
  180. ###################################
  181. MSG_EXISTS='The page `%s` already exists in the `%s` directory.\n'
  182. MSG_NOT_EXISTS='The page `%s` does not exists as English page `%s` yet.\n'
  183. MSG_OUTDATED='The page `%s` is outdated, %s, compared to the English page.\n'
  184. MSG_IS_COPY='The page `%s` seems to be a copy of `%s` (%d%% matching).\n'
  185. MSG_NOT_DIR='The file `%s` does not look like a directory.\n'
  186. MSG_NOT_FILE='The file `%s` does not look like a regular file.\n'
  187. MSG_NOT_MD='The file `%s` does not have a `.md` extension.\n'
  188. MSG_MORE_INFO='The page `%s` has a more info link that does not match the one in the English page. Please check the "More information:" translation as well using https://github.com/tldr-pages/tldr/blob/main/contributing-guides/translation-templates/more-info-link.md.\n'
  189. MSG_PAGE_TITLE='The page `%s` has a page title that does not match the one in the English page.\n'
  190. PLATFORMS=$(ls pages/)
  191. if [[ $CI == true && $GITHUB_REPOSITORY == "tldr-pages/tldr" && $PULL_REQUEST_ID != "" ]]; then
  192. check_diff
  193. check_structure
  194. else
  195. echo 'Not a pull request, refusing to run.' >&2
  196. exit 0
  197. fi