check-pr.sh 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. #!/usr/bin/env bash
  2. # SPDX-License-Identifier: MIT
  3. # This script is executed by GitHub Actions for every pull request opened.
  4. # It currently accomplishes the following objectives:
  5. #
  6. # 1. Detect pages that were just copied (i.e. cp pages/{common,linux}/7z.md).
  7. # 2. Detect English pages that were added in a platform specific directory although
  8. # they already exist under 'common'.
  9. # 4. Detect translated pages that do not exist as English pages yet.
  10. # 5. Detect outdated pages. A page is marked as outdated when the number of
  11. # commands differ from the number of commands in the English page or the
  12. # contents of the commands differ from the English page.
  13. # 6. Detect other miscellaneous anomalies in the pages folder.
  14. #
  15. # Results are printed to stdout, logs and errors to stderr.
  16. #
  17. # NOTE: must be run from the repository root directory to correctly work!
  18. # NOTE: no `set -e`, failure of this script should not invalidate the build.
  19. # Check for duplicated pages.
  20. function check_duplicates {
  21. local page=$1 # page path in the format 'pages<.language_code>/platform/pagename.md'
  22. local parts
  23. local other
  24. readarray -td'/' parts < <(echo -n "$page")
  25. local language_folder=${parts[0]}
  26. if [[ "$language_folder" != "pages" ]]; then # only check for duplicates in English
  27. return 1
  28. fi
  29. local platform=${parts[1]}
  30. local file=${parts[2]}
  31. case "$platform" in
  32. common) # skip common-platform
  33. ;;
  34. *) # check if page already exists under common
  35. if [[ -f "pages/common/$file" ]]; then
  36. printf "\x2d $MSG_EXISTS" "$page" 'common'
  37. fi
  38. ;;
  39. esac
  40. }
  41. function check_missing_english_page() {
  42. local page=$1
  43. local english_page="pages/${page#pages*\/}"
  44. if [[ "$page" = "$english_page" ]]; then
  45. return 1
  46. fi
  47. if [[ ! -f "$english_page" ]]; then
  48. printf "\x2d $MSG_NOT_EXISTS" "$page" "$english_page"
  49. fi
  50. }
  51. function count_commands() {
  52. local file="$1"
  53. local regex="$2"
  54. grep -c "$regex" "$file"
  55. }
  56. function strip_commands() {
  57. local file="$1"
  58. local regex="$2"
  59. local stripped_commands=()
  60. mapfile -t stripped_commands < <(
  61. grep "$regex" "$file" |
  62. sed 's/{{[^}]*}}/{{}}/g' |
  63. sed 's/<[^>]*>//g' |
  64. sed 's/([^)]*)//g' |
  65. sed 's/"[^"]*"/""/g' |
  66. sed "s/'[^']*'//g" |
  67. sed 's/`//g'
  68. )
  69. printf "%s\n" "${stripped_commands[*]}"
  70. }
  71. function check_outdated_page() {
  72. local page=$1
  73. local english_page="pages/${page#pages*\/}"
  74. local command_regex='^`[^`]\+`$'
  75. if [[ "$page" = "$english_page" ]] || [[ ! -f "$english_page" ]]; then
  76. return 1
  77. fi
  78. local english_commands
  79. english_commands=$(count_commands "$english_page" "$command_regex")
  80. local commands
  81. commands=$(count_commands "$page" "$command_regex")
  82. local english_commands_as_string
  83. english_commands_as_string=$(strip_commands "$english_page" "$command_regex")
  84. local commands_as_string
  85. commands_as_string=$(strip_commands "$page" "$command_regex")
  86. if [[ "$english_commands" != "$commands" ]]; then
  87. printf "\x2d $MSG_OUTDATED" "$page" "based on number of commands"
  88. elif [[ "$english_commands_as_string" != "$commands_as_string" ]]; then
  89. printf "\x2d $MSG_OUTDATED" "$page" "based on the command contents itself"
  90. fi
  91. }
  92. # Look at git diff and check for copied/duplicated pages.
  93. function check_diff {
  94. local git_diff
  95. local line
  96. local entry
  97. git_diff=$(git diff --name-status --find-copies-harder --diff-filter=ACM origin/main -- pages*/)
  98. if [[ -n $git_diff ]]; then
  99. echo -e "Check PR: git diff:\n$git_diff" >&2
  100. else
  101. echo 'Check PR: git diff looks fine, no interesting changes detected.' >&2
  102. return 0
  103. fi
  104. while read line; do
  105. readarray -td$'\t' entry < <(echo -n "$line")
  106. local change="${entry[0]}"
  107. local file1="${entry[1]}"
  108. local file2="${entry[2]}"
  109. case "$change" in
  110. C*) # file2 is a copy of file1
  111. local percentage=${change#C}
  112. percentage=${percentage#0}
  113. percentage=${percentage#0}
  114. printf "\x2d $MSG_IS_COPY" "$file2" "$file1" "$percentage"
  115. ;;
  116. A) # file1 was newly added
  117. check_duplicates "$file1"
  118. check_missing_english_page "$file1"
  119. check_outdated_page "$file1"
  120. ;;
  121. M) # file1 was modified
  122. check_missing_english_page "$file1"
  123. check_outdated_page "$file1"
  124. ;;
  125. esac
  126. done <<< "$git_diff"
  127. }
  128. # Recursively check the pages/ folder for anomalies.
  129. function check_structure {
  130. for platform in $PLATFORMS; do
  131. if [[ ! -d "pages/$platform" ]]; then
  132. printf "\x2d $MSG_NOT_DIR" "pages/$platform"
  133. else
  134. for page in "pages/$platform"/*; do
  135. if [[ ! -f $page ]]; then
  136. printf "\x2d $MSG_NOT_FILE" "$page"
  137. elif [[ ${page:(-3)} != ".md" ]]; then
  138. printf "\x2d $MSG_NOT_MD" "$page"
  139. fi
  140. done
  141. fi
  142. done
  143. }
  144. ###################################
  145. # MAIN
  146. ###################################
  147. MSG_EXISTS='The page `%s` already exists in the `%s` directory.\n'
  148. MSG_NOT_EXISTS='The page `%s` does not exists as English page `%s` yet.\n'
  149. MSG_OUTDATED='The page `%s` is outdated, %s.\n'
  150. MSG_IS_COPY='The page `%s` seems to be a copy of `%s` (%d%% matching).\n'
  151. MSG_NOT_DIR='The file `%s` does not look like a directory.\n'
  152. MSG_NOT_FILE='The file `%s` does not look like a regular file.\n'
  153. MSG_NOT_MD='The file `%s` does not have a `.md` extension.\n'
  154. PLATFORMS=$(ls pages/)
  155. if [[ $CI == true && $GITHUB_REPOSITORY == "tldr-pages/tldr" && $PULL_REQUEST_ID != "" ]]; then
  156. check_diff
  157. check_structure
  158. else
  159. echo 'Not a pull request, refusing to run.' >&2
  160. exit 0
  161. fi