check-pr.sh 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. #!/usr/bin/env bash
  2. # SPDX-License-Identifier: MIT
  3. # This script is executed by GitHub Actions for every pull request opened.
  4. # It currently accomplishes the following objectives:
  5. #
  6. # 1. Detect pages that were just copied (i.e. cp pages/{common,linux}/7z.md).
  7. # 2. Detect English pages that were added in a platform specific directory although
  8. # they already exist under 'common'.
  9. # 4. Detect translated pages that do not exist as English pages yet.
  10. # 5. Detect outdated pages. A page is marked as outdated when the number of
  11. # commands differ from the number of commands in the English page or the
  12. # contents of the commands differ from the English page.
  13. # 6. Detect other miscellaneous anomalies in the pages folder.
  14. #
  15. # Results are printed to stdout, logs and errors to stderr.
  16. #
  17. # NOTE: must be run from the repository root directory to correctly work!
  18. # NOTE: no `set -e`, failure of this script should not invalidate the build.
  19. # Check for duplicated pages.
  20. function check_duplicates {
  21. local page=$1 # page path in the format 'pages<.language_code>/platform/pagename.md'
  22. local parts
  23. local other
  24. readarray -td'/' parts < <(echo -n "$page")
  25. local language_folder=${parts[0]}
  26. if [[ "$language_folder" != "pages" ]]; then # only check for duplicates in English
  27. return 1
  28. fi
  29. local platform=${parts[1]}
  30. local file=${parts[2]}
  31. case "$platform" in
  32. common) # skip common-platform
  33. ;;
  34. *) # check if page already exists under common
  35. if [[ -f "pages/common/$file" ]]; then
  36. printf "\x2d $MSG_EXISTS" "$page" 'common'
  37. fi
  38. ;;
  39. esac
  40. }
  41. function check_missing_english_page() {
  42. local page=$1
  43. local english_page="pages/${page#pages*\/}"
  44. if [[ "$page" = "$english_page" ]]; then
  45. return 1
  46. fi
  47. if [[ ! -f "$english_page" ]]; then
  48. printf "\x2d $MSG_NOT_EXISTS" "$page" "$english_page"
  49. fi
  50. }
  51. function count_commands() {
  52. local file="$1"
  53. local regex="$2"
  54. grep -c "$regex" "$file"
  55. }
  56. function strip_commands() {
  57. local file="$1"
  58. local regex="$2"
  59. local stripped_commands=()
  60. mapfile -t stripped_commands < <(
  61. grep "$regex" "$file" |
  62. sed 's/{{[^}]*}}/{{}}/g' |
  63. sed 's/<[^>]*>//g' |
  64. sed 's/([^)]*)//g' |
  65. sed 's/"[^"]*"/""/g' |
  66. sed "s/'[^']*'//g" |
  67. sed 's/`//g'
  68. )
  69. printf "%s\n" "${stripped_commands[*]}"
  70. }
  71. function check_outdated_page() {
  72. local page=$1
  73. local english_page="pages/${page#pages*\/}"
  74. local command_regex='^`[^`]\+`$'
  75. if [[ "$page" = "$english_page" ]] || [[ ! -f "$english_page" ]]; then
  76. return 1
  77. fi
  78. local english_commands
  79. english_commands=$(count_commands "$english_page" "$command_regex")
  80. local commands
  81. commands=$(count_commands "$page" "$command_regex")
  82. local english_commands_as_string
  83. english_commands_as_string=$(strip_commands "$english_page" "$command_regex")
  84. local commands_as_string
  85. commands_as_string=$(strip_commands "$page" "$command_regex")
  86. if [[ "$english_commands" != "$commands" ]]; then
  87. printf "\x2d $MSG_OUTDATED" "$page" "based on number of commands"
  88. elif [[ "$english_commands_as_string" != "$commands_as_string" ]]; then
  89. printf "\x2d $MSG_OUTDATED" "$page" "based on the command contents itself"
  90. fi
  91. }
  92. function check_more_info_link() {
  93. local page=$1
  94. grep "$page" "more-info-links.txt" > /dev/null
  95. if [ $? -eq 0 ]; then
  96. printf "\x2d $MSG_MORE_INFO" "$page"
  97. fi
  98. }
  99. function check_page_title() {
  100. local page=$1
  101. grep "$page" "page-titles.txt" > /dev/null
  102. if [ $? -eq 0 ]; then
  103. printf "\x2d $MSG_PAGE_TITLE" "$page"
  104. fi
  105. }
  106. # Look at git diff and check for copied/duplicated pages.
  107. function check_diff {
  108. local git_diff
  109. local line
  110. local entry
  111. git_diff=$(git diff --name-status --find-copies-harder --diff-filter=ACM origin/main -- pages*/)
  112. if [[ -n $git_diff ]]; then
  113. echo -e "Check PR: git diff:\n$git_diff" >&2
  114. else
  115. echo 'Check PR: git diff looks fine, no interesting changes detected.' >&2
  116. return 0
  117. fi
  118. python3 scripts/set-more-info-link.py -Sn > more-info-links.txt
  119. python3 scripts/set-page-title.py -Sn > page-titles.txt
  120. while read line; do
  121. readarray -td$'\t' entry < <(echo -n "$line")
  122. local change="${entry[0]}"
  123. local file1="${entry[1]}"
  124. local file2="${entry[2]}"
  125. case "$change" in
  126. C*) # file2 is a copy of file1
  127. local percentage=${change#C}
  128. percentage=${percentage#0}
  129. percentage=${percentage#0}
  130. printf "\x2d $MSG_IS_COPY" "$file2" "$file1" "$percentage"
  131. ;;
  132. A) # file1 was newly added
  133. check_duplicates "$file1"
  134. check_missing_english_page "$file1"
  135. check_outdated_page "$file1"
  136. check_more_info_link "$file1"
  137. check_page_title "$file1"
  138. ;;
  139. M) # file1 was modified
  140. check_missing_english_page "$file1"
  141. check_outdated_page "$file1"
  142. check_more_info_link "$file1"
  143. check_page_title "$file1"
  144. ;;
  145. esac
  146. done <<< "$git_diff"
  147. }
  148. # Recursively check the pages/ folder for anomalies.
  149. function check_structure {
  150. for platform in $PLATFORMS; do
  151. if [[ ! -d "pages/$platform" ]]; then
  152. printf "\x2d $MSG_NOT_DIR" "pages/$platform"
  153. else
  154. for page in "pages/$platform"/*; do
  155. if [[ ! -f $page ]]; then
  156. printf "\x2d $MSG_NOT_FILE" "$page"
  157. elif [[ ${page:(-3)} != ".md" ]]; then
  158. printf "\x2d $MSG_NOT_MD" "$page"
  159. fi
  160. done
  161. fi
  162. done
  163. }
  164. ###################################
  165. # MAIN
  166. ###################################
  167. MSG_EXISTS='The page `%s` already exists in the `%s` directory.\n'
  168. MSG_NOT_EXISTS='The page `%s` does not exists as English page `%s` yet.\n'
  169. MSG_OUTDATED='The page `%s` is outdated, %s.\n'
  170. MSG_IS_COPY='The page `%s` seems to be a copy of `%s` (%d%% matching).\n'
  171. MSG_NOT_DIR='The file `%s` does not look like a directory.\n'
  172. MSG_NOT_FILE='The file `%s` does not look like a regular file.\n'
  173. MSG_NOT_MD='The file `%s` does not have a `.md` extension.\n'
  174. MSG_MORE_INFO='The page `%s` has an outdated more info link.\n'
  175. MSG_PAGE_TITLE='The page `%s` has an outdated page title.\n'
  176. PLATFORMS=$(ls pages/)
  177. if [[ $CI == true && $GITHUB_REPOSITORY == "tldr-pages/tldr" && $PULL_REQUEST_ID != "" ]]; then
  178. check_diff
  179. check_structure
  180. else
  181. echo 'Not a pull request, refusing to run.' >&2
  182. exit 0
  183. fi