render.py 3.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. #!/usr/bin/env python3
  2. # SPDX-License-Identifier: MIT
  3. """
  4. A Python script to generate a single PDF document with all the `tldr` pages. It works by generating
  5. intermediate HTML files from existing md files using Python-markdown, applying desired formatting
  6. through CSS, and finally rendering them as PDF. There is no LaTeX dependency for generating the PDF.
  7. """
  8. import os
  9. import sys
  10. import glob
  11. import re
  12. import markdown
  13. import argparse
  14. from datetime import datetime
  15. from weasyprint import HTML
  16. def main(loc, colorscheme):
  17. # Checking correctness of path
  18. if not os.path.isdir(loc):
  19. print("Invalid directory. Please try again!", file=sys.stderr)
  20. sys.exit(1)
  21. # Set up css style sheets
  22. csslist = ["basic.css"]
  23. if colorscheme != "basic":
  24. csslist.append(colorscheme + ".css")
  25. # A string that stores all pages in HTML format
  26. html = (
  27. '<!doctype html><html><head><meta charset="utf-8"></head>'
  28. + "<body><h1 class=title-main>tldr pages</h1>"
  29. + "<h4 class=title-sub>Simplified and community-driven man pages</h4>"
  30. + "<h6 class=title-sub><em><small>Generated on "
  31. + datetime.now().strftime("%c")
  32. + "</small></em></h6>"
  33. + '<p style="page-break-before: always" ></p>'
  34. )
  35. # Writing names of all directories inside 'pages' to a list
  36. for operating_sys in sorted(os.listdir(loc)):
  37. # Required string to create directory title pages
  38. html += (
  39. "<h2 class=title-dir>"
  40. + operating_sys.capitalize()
  41. + "</h2>"
  42. + '<p style="page-break-before: always" ></p>'
  43. )
  44. # Conversion of Markdown to HTML string
  45. for page_number, md in enumerate(
  46. sorted(glob.glob(os.path.join(loc, operating_sys, "*.md"))), start=1
  47. ):
  48. with open(md, "r") as inp:
  49. text = inp.readlines()
  50. for line in text:
  51. if re.match(r"^>", line):
  52. line = line[:0] + "####" + line[1:]
  53. html += markdown.markdown(line)
  54. html += '<p style="page-break-before: always" ></p>'
  55. print(f"Rendered page {page_number} of the directory {operating_sys}")
  56. html += "</body></html>"
  57. # Writing the PDF to disk
  58. print("\nConverting all pages to PDF...")
  59. HTML(string=html).write_pdf("tldr-pages.pdf", stylesheets=csslist)
  60. if os.path.exists("tldr-pages.pdf"):
  61. print("\nCreated tldr-pages.pdf in the current directory!\n")
  62. if __name__ == "__main__":
  63. # Parsing the arguments
  64. parser = argparse.ArgumentParser(
  65. prog="tldr-pages-to-pdf",
  66. description="A Python script to generate a single PDF document with all the `tldr` pages.",
  67. )
  68. parser.add_argument("dir_path", help="Path to the 'pages' directory")
  69. parser.add_argument(
  70. "-c",
  71. "--color",
  72. choices=["solarized-light", "solarized-dark", "basic"],
  73. default="basic",
  74. help="Color scheme of the PDF",
  75. )
  76. args = parser.parse_args()
  77. main(args.dir_path, args.color)