issuu-dl 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. #!/usr/bin/env python3
  2. #
  3. # issuu-dl v0.5
  4. #
  5. # download documents - as pdf's or cbz's - from issuu.com
  6. #
  7. # tested (and working) on 05 Apr 2022
  8. # arch-deps: python-tqdm, python-termcolor, img2pdf, typer
  9. import os
  10. import sys
  11. import shutil
  12. import json
  13. from enum import Enum
  14. import typer
  15. import requests
  16. import img2pdf
  17. from termcolor import colored
  18. from tqdm import tqdm
  19. class OutType(str, Enum):
  20. PDF = "pdf"
  21. CBZ = "cbz"
  22. def error_msg(msg: str):
  23. print(colored("[*] Error: {}".format(msg), "red"))
  24. sys.exit(1)
  25. class Downloader:
  26. def __init__(self):
  27. self.filenames = []
  28. self.temp_dir = ""
  29. self.target_doc = ""
  30. self.outfile = ""
  31. self.outfile_base = ""
  32. self.pages = ""
  33. self.request_session = requests.Session()
  34. def check_document_format(self, url, output_file):
  35. self.target_doc = url.split("/")
  36. try:
  37. self.target_doc = self.target_doc[3] + "/" + self.target_doc[5]
  38. except IndexError:
  39. error_msg("wrong URL format, please recheck your link")
  40. print(
  41. "[*] target_doc is '{}'".format(colored(self.target_doc, "yellow"))
  42. )
  43. self.outfile_base = self.target_doc.replace("/", "_")
  44. self.outfile = self.outfile_base + "." + output_file
  45. def download_metadata(self, url):
  46. print("[*] opening page...")
  47. request_session = requests.Session()
  48. try:
  49. request = request_session.get(url)
  50. except requests.exceptions.TooManyRedirects:
  51. error_msg("too many redirect requests made")
  52. try:
  53. assert request.status_code == 200
  54. except AssertionError:
  55. error_msg(
  56. "http status code received: {}".format(request.status_code)
  57. )
  58. print(colored("[+] url confirmed", "green"))
  59. assert "issuu-reader3-embed-files" in request.content.decode()
  60. print(colored("[+] reader3 support confirmed", "green"))
  61. print("[*] downloading reader3_4.json...")
  62. request = request_session.get(
  63. "https://reader3.isu.pub/{}/reader3_4.json".format(self.target_doc)
  64. )
  65. json_data = json.loads(request.content.decode())
  66. publication_id = json_data["document"]["publicationId"]
  67. revision_id = json_data["document"]["revisionId"]
  68. self.pages = json_data["document"]["pages"]
  69. print(
  70. colored("[+] fetched document data: ", "green")
  71. + colored(
  72. "publicationId is {}, revisionId is {}".format(
  73. publication_id, revision_id
  74. ),
  75. "yellow",
  76. )
  77. )
  78. print(colored("[+] found {} pages".format(len(self.pages)), "green"))
  79. def download_pages(self):
  80. print("[*] downloading pages...")
  81. self.filenames = []
  82. self.temp_dir = str(os.getpid()) + "-" + self.outfile_base
  83. try:
  84. os.mkdir(self.temp_dir)
  85. except (FileExistsError, PermissionError):
  86. error_msg(
  87. "could not create temporary directory '{}'".format(
  88. self.temp_dir
  89. )
  90. )
  91. for page in tqdm(self.pages):
  92. image_url = page["imageUri"].split("/")
  93. image_path = os.path.join(
  94. self.temp_dir, image_url[1] + "-" + image_url[3]
  95. )
  96. request = self.request_session.get("https://" + page["imageUri"])
  97. with open(image_path, "wb") as write_image:
  98. write_image.write(request.content)
  99. self.filenames.append(image_path)
  100. print(
  101. colored(
  102. "[+] downloaded {} jpg files".format(len(self.pages)), "green"
  103. )
  104. )
  105. def convert_files(self, output_file):
  106. if output_file == "pdf":
  107. print("[*] converting to single pdf...")
  108. with open(self.outfile, "wb") as out:
  109. out.write(img2pdf.convert(self.filenames))
  110. print(
  111. colored(
  112. "[+] output pdf '{}' is ready".format(
  113. colored(self.outfile, "yellow")
  114. ),
  115. "green",
  116. )
  117. )
  118. else:
  119. print("[*] converting to single cbz...")
  120. shutil.make_archive(self.outfile, "zip", self.temp_dir)
  121. os.rename(self.outfile + ".zip", self.outfile)
  122. print(
  123. colored(
  124. "[+] output cbz '{}' is ready".format(
  125. colored(self.outfile, "yellow")
  126. ),
  127. "green",
  128. )
  129. )
  130. def remove_tempfiles(self):
  131. print("[*] cleaning up temporary files...")
  132. for file in self.filenames:
  133. os.remove(file)
  134. os.rmdir(self.temp_dir)
  135. main = typer.Typer(add_completion=False)
  136. @main.command(no_args_is_help=True)
  137. def greeting(
  138. url: str = typer.Argument(..., help="Full issuu url"),
  139. output_file: OutType = typer.Option(
  140. "pdf", "--output-type", "-o", help="Whether output is a pdf or cbz."
  141. ),
  142. ):
  143. download = Downloader()
  144. download.check_document_format(url, output_file)
  145. download.download_metadata(url)
  146. download.download_pages()
  147. download.convert_files(output_file)
  148. download.remove_tempfiles()
  149. if __name__ == "__main__":
  150. main()