check-pr.sh 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. #!/usr/bin/env bash
  2. # SPDX-License-Identifier: MIT
  3. # shellcheck disable=SC2016,SC2059
  4. # This script is executed by GitHub Actions for every pull request opened.
  5. # It currently accomplishes the following objectives:
  6. #
  7. # 1. Detect pages that were just copied (i.e. cp pages/{common,linux}/7z.md).
  8. # 2. Detect English pages that were added in a platform specific directory although
  9. # they already exist under 'common'.
  10. # 4. Detect translated pages that do not exist as English pages yet.
  11. # 5. Detect outdated pages. A page is marked as outdated when the number of
  12. # commands differ from the number of commands in the English page or the
  13. # contents of the commands differ from the English page.
  14. # 6. Detect other miscellaneous anomalies in the pages folder.
  15. #
  16. # Results are printed to stdout, logs and errors to stderr.
  17. #
  18. # NOTE: must be run from the repository root directory to correctly work!
  19. # NOTE: no `set -e`, failure of this script should not invalidate the build.
  20. # Check for duplicated pages.
  21. function check_duplicates {
  22. local page="$1" # page path in the format 'pages<.language_code>/platform/pagename.md'
  23. local parts
  24. readarray -td'/' parts < <(echo -n "$page")
  25. local language_folder="${parts[0]}"
  26. if [[ $language_folder != "pages" ]]; then # only check for duplicates in English
  27. return 1
  28. fi
  29. local platform="${parts[1]}"
  30. local file="${parts[2]}"
  31. case $platform in
  32. common) # skip common-platform
  33. ;;
  34. *) # check if page already exists under common
  35. if [[ -f "pages/common/$file" ]]; then
  36. printf "\x2d $MSG_EXISTS" "$page" 'common'
  37. fi
  38. ;;
  39. esac
  40. }
  41. function check_missing_english_page() {
  42. local page="$1"
  43. local english_page="pages/${page#pages*\/}"
  44. if [[ $page == "$english_page" ]]; then
  45. return 1
  46. fi
  47. if [[ ! -f $english_page ]]; then
  48. printf "\x2d $MSG_NOT_EXISTS" "$page" "$english_page"
  49. fi
  50. }
  51. function count_commands() {
  52. local file="$1"
  53. local regex="$2"
  54. grep -c "$regex" "$file"
  55. }
  56. function strip_commands() {
  57. local file="$1"
  58. local regex="$2"
  59. local stripped_commands=()
  60. mapfile -t stripped_commands < <(
  61. grep "$regex" "$file" |
  62. sed 's/{{[^}]*}}/{{}}/g' |
  63. sed 's/<[^>]*>//g' |
  64. sed 's/([^)]*)//g' |
  65. sed 's/"[^"]*"/""/g' |
  66. sed "s/'[^']*'//g" |
  67. sed 's/`//g'
  68. )
  69. printf "%s\n" "${stripped_commands[*]}"
  70. }
  71. function check_outdated_page() {
  72. local page="$1"
  73. local english_page="pages/${page#pages*\/}"
  74. local command_regex='^`[^`]\+`$'
  75. if [[ $page == "$english_page" || ! -f $english_page ]]; then
  76. return 1
  77. fi
  78. local english_commands commands english_commands_as_string commands_as_string
  79. english_commands="$(count_commands "$english_page" "$command_regex")"
  80. commands="$(count_commands "$page" "$command_regex")"
  81. english_commands_as_string="$(strip_commands "$english_page" "$command_regex")"
  82. commands_as_string="$(strip_commands "$page" "$command_regex")"
  83. if [[ $english_commands != "$commands" ]]; then
  84. printf "\x2d $MSG_OUTDATED" "$page" "based on number of commands"
  85. elif [[ "$english_commands_as_string" != "$commands_as_string" ]]; then
  86. printf "\x2d $MSG_OUTDATED" "$page" "based on the command contents itself"
  87. fi
  88. }
  89. function check_more_info_link() {
  90. local page=$1
  91. if grep -q "$page" "more-info-links.txt"; then
  92. printf "\x2d $MSG_MORE_INFO" "$page"
  93. fi
  94. }
  95. function check_page_title() {
  96. local page=$1
  97. if grep -q "$page" "page-titles.txt"; then
  98. printf "\x2d $MSG_PAGE_TITLE" "$page"
  99. fi
  100. }
  101. # Look at git diff and check for copied/duplicated pages.
  102. function check_diff {
  103. local git_diff
  104. local line
  105. local entry
  106. git_diff="$(git diff --name-status --find-copies-harder --diff-filter=ACM origin/main -- pages*/)"
  107. if [[ -n $git_diff ]]; then
  108. echo -e "Check PR: git diff:\n$git_diff" >&2
  109. else
  110. echo 'Check PR: git diff looks fine, no interesting changes detected.' >&2
  111. return 0
  112. fi
  113. python3 scripts/set-more-info-link.py -Sn > more-info-links.txt
  114. python3 scripts/set-page-title.py -Sn > page-titles.txt
  115. while read -r line; do
  116. readarray -td$'\t' entry < <(echo -n "$line")
  117. local change="${entry[0]}"
  118. local file1="${entry[1]}"
  119. local file2="${entry[2]}"
  120. case "$change" in
  121. C*) # file2 is a copy of file1
  122. local percentage=${change#C}
  123. percentage=${percentage#0}
  124. percentage=${percentage#0}
  125. printf "\x2d $MSG_IS_COPY" "$file2" "$file1" "$percentage"
  126. ;;
  127. A) # file1 was newly added
  128. check_duplicates "$file1"
  129. check_missing_english_page "$file1"
  130. check_outdated_page "$file1"
  131. check_more_info_link "$file1"
  132. check_page_title "$file1"
  133. ;;
  134. M) # file1 was modified
  135. check_missing_english_page "$file1"
  136. check_outdated_page "$file1"
  137. check_more_info_link "$file1"
  138. check_page_title "$file1"
  139. ;;
  140. esac
  141. done <<< "$git_diff"
  142. rm more-info-links.txt page-titles.txt
  143. }
  144. # Recursively check the pages/ folder for anomalies.
  145. function check_structure {
  146. for platform in $PLATFORMS; do
  147. if [[ ! -d "pages/$platform" ]]; then
  148. printf "\x2d $MSG_NOT_DIR" "pages/$platform"
  149. else
  150. for page in "pages/$platform"/*; do
  151. if [[ ! -f $page ]]; then
  152. printf "\x2d $MSG_NOT_FILE" "$page"
  153. elif [[ ${page:(-3)} != ".md" ]]; then
  154. printf "\x2d $MSG_NOT_MD" "$page"
  155. fi
  156. done
  157. fi
  158. done
  159. }
  160. ###################################
  161. # MAIN
  162. ###################################
  163. MSG_EXISTS='The page `%s` already exists in the `%s` directory.\n'
  164. MSG_NOT_EXISTS='The page `%s` does not exists as English page `%s` yet.\n'
  165. MSG_OUTDATED='The page `%s` is outdated, %s, compared to the English page.\n'
  166. MSG_IS_COPY='The page `%s` seems to be a copy of `%s` (%d%% matching).\n'
  167. MSG_NOT_DIR='The file `%s` does not look like a directory.\n'
  168. MSG_NOT_FILE='The file `%s` does not look like a regular file.\n'
  169. MSG_NOT_MD='The file `%s` does not have a `.md` extension.\n'
  170. MSG_MORE_INFO='The page `%s` has a more info link that does not match the one in the English page. Please check the "More information:" translation as well using https://github.com/tldr-pages/tldr/blob/main/contributing-guides/translation-templates/more-info-link.md.\n'
  171. MSG_PAGE_TITLE='The page `%s` has a page title that does not match the one in the English page.\n'
  172. PLATFORMS=$(ls pages/)
  173. if [[ $CI == true && $GITHUB_REPOSITORY == "tldr-pages/tldr" && $PULL_REQUEST_ID != "" ]]; then
  174. check_diff
  175. check_structure
  176. else
  177. echo 'Not a pull request, refusing to run.' >&2
  178. exit 0
  179. fi