|
@@ -1,113 +1,148 @@
|
|
|
#!/usr/bin/env python3
|
|
|
#
|
|
|
-# issuu-dl v0.4
|
|
|
+# issuu-dl v0.5
|
|
|
#
|
|
|
-# download pdf from issuu.com
|
|
|
+# download documents - as pdf's or cbz's - from issuu.com
|
|
|
#
|
|
|
# tested (and working) on 05 Apr 2022
|
|
|
|
|
|
-# arch-deps: python-tqdm, python-termcolor, img2pdf, python-validators, typer
|
|
|
+# arch-deps: python-tqdm, python-termcolor, img2pdf, typer
|
|
|
|
|
|
-import typer
|
|
|
-import requests
|
|
|
+import os
|
|
|
import sys
|
|
|
+import shutil
|
|
|
import json
|
|
|
+from enum import Enum
|
|
|
+
|
|
|
+import typer
|
|
|
+import requests
|
|
|
import img2pdf
|
|
|
-import os
|
|
|
-import shutil
|
|
|
from termcolor import colored
|
|
|
from tqdm import tqdm
|
|
|
-from enum import Enum
|
|
|
-app = typer.Typer(add_completion=False)
|
|
|
-
|
|
|
-
|
|
|
-class outType(str, Enum):
|
|
|
- pdf = 'pdf'
|
|
|
- cbz = 'cbz'
|
|
|
|
|
|
-@app.command(no_args_is_help=True)
|
|
|
-def greeting(url: str= typer.Argument(..., help='Full issuu url'), output_file: outType = typer.Option('pdf', "--output-type", "-o", help='Whether output is a pdf or cbz.')):
|
|
|
|
|
|
- doc = url.split('/')
|
|
|
- try:
|
|
|
- doc = doc[3] + '/' + doc[5]
|
|
|
- except IndexError:
|
|
|
- errorMsg()
|
|
|
- print(doc)
|
|
|
- print("[*] doc is '{}'".format(colored(doc, 'yellow')))
|
|
|
- outfile_prefix = doc.replace('/', '_')
|
|
|
- outfile = outfile_prefix + "." + output_file
|
|
|
- pid = str(os.getpid())
|
|
|
+class OutType(str, Enum):
|
|
|
+ PDF = 'pdf'
|
|
|
+ CBZ = 'cbz'
|
|
|
+
|
|
|
+
|
|
|
+def error_msg(msg: str):
|
|
|
+ print(colored("[*] Error: {}".format(msg), "red"))
|
|
|
+ sys.exit(1)
|
|
|
|
|
|
- print("[*] opening page...")
|
|
|
- s = requests.Session()
|
|
|
|
|
|
- try:
|
|
|
- r = s.get(url)
|
|
|
- except requests.exceptions.TooManyRedirects:
|
|
|
- errorMsg()
|
|
|
- try:
|
|
|
- assert r.status_code == 200
|
|
|
- except AssertionError:
|
|
|
- errorMsg()
|
|
|
- print(colored("[+] url confirmed", "green"))
|
|
|
- assert 'issuu-reader3-embed-files' in r.content.decode()
|
|
|
- print(colored("[+] reader3 support confirmed", "green"))
|
|
|
+class Downloader():
|
|
|
+ def __init__(self):
|
|
|
+ self.filenames = []
|
|
|
+ self.temp_dir = ""
|
|
|
+ self.target_doc = ""
|
|
|
+ self.outfile = ""
|
|
|
+ self.outfile_base = ""
|
|
|
+ self.pages = ""
|
|
|
+ self.request_session = requests.Session()
|
|
|
+
|
|
|
+
|
|
|
+ def check_document_format(self, url, output_file):
|
|
|
+ self.target_doc = url.split('/')
|
|
|
+ try:
|
|
|
+ self.target_doc = self.target_doc[3] + '/' + self.target_doc[5]
|
|
|
+ except IndexError:
|
|
|
+ error_msg("wrong URL format, please recheck your link")
|
|
|
+ print("[*] target_doc is '{}'".format(colored(self.target_doc, 'yellow')))
|
|
|
+ self.outfile_base = self.target_doc.replace('/', '_')
|
|
|
+ self.outfile = self.outfile_base + "." + output_file
|
|
|
+
|
|
|
|
|
|
- print("[*] downloading reader3_4.json...")
|
|
|
+ def download_metadata(self, url):
|
|
|
+ print("[*] opening page...")
|
|
|
+ request_session = requests.Session()
|
|
|
|
|
|
- r = s.get("https://reader3.isu.pub/{}/reader3_4.json".format(doc))
|
|
|
- j = json.loads(r.content.decode())
|
|
|
+ try:
|
|
|
+ request = request_session.get(url)
|
|
|
+ except requests.exceptions.TooManyRedirects:
|
|
|
+ error_msg("too many redirect requests made")
|
|
|
+ try:
|
|
|
+ assert request.status_code == 200
|
|
|
+ except AssertionError:
|
|
|
+ error_msg("http status code received: {}".format(request.status_code))
|
|
|
+ print(colored("[+] url confirmed", "green"))
|
|
|
+ assert 'issuu-reader3-embed-files' in request.content.decode()
|
|
|
+ print(colored("[+] reader3 support confirmed", "green"))
|
|
|
|
|
|
- pubId = j["document"]["publicationId"]
|
|
|
- revId = j["document"]["revisionId"]
|
|
|
- pages = j["document"]["pages"]
|
|
|
+ print("[*] downloading reader3_4.json...")
|
|
|
|
|
|
- print(colored("[+] fetched document data: ", "green") + colored("publicationId is {}, revisionId is {}".format(pubId, revId), "yellow"))
|
|
|
- print(colored("[+] found {} pages".format(len(pages)), "green"))
|
|
|
+ request = request_session.get("https://reader3.isu.pub/{}/reader3_4.json".
|
|
|
+ format(self.target_doc))
|
|
|
+ json_data = json.loads(request.content.decode())
|
|
|
|
|
|
- print("[*] downloading pages...")
|
|
|
+ publication_id = json_data["document"]["publicationId"]
|
|
|
+ revision_id = json_data["document"]["revisionId"]
|
|
|
+ self.pages = json_data["document"]["pages"]
|
|
|
|
|
|
- filenames = []
|
|
|
- directoryName = pid+"_"+outfile_prefix
|
|
|
- os.mkdir(directoryName)
|
|
|
- for page in tqdm(pages):
|
|
|
- i = page["imageUri"].split('/')
|
|
|
- f = os.path.join(directoryName, i[1]+"-"+i[3])
|
|
|
- r = s.get("https://"+page["imageUri"])
|
|
|
- open(f, "wb").write(r.content)
|
|
|
- filenames.append(f)
|
|
|
+ print(colored("[+] fetched document data: ", "green") +
|
|
|
+ colored("publicationId is {}, revisionId is {}".format(
|
|
|
+ publication_id, revision_id), "yellow"))
|
|
|
+ print(colored("[+] found {} pages".format(len(self.pages)), "green"))
|
|
|
|
|
|
- print(colored("[+] downloaded {} jpg files".format(len(pages), "green")))
|
|
|
|
|
|
- if output_file == "pdf":
|
|
|
- print("[*] converting to single pdf...")
|
|
|
- with open(outfile, "wb") as out:
|
|
|
- out.write(img2pdf.convert(filenames))
|
|
|
+ def download_pages(self):
|
|
|
+ print("[*] downloading pages...")
|
|
|
|
|
|
- print(colored("[+] output pdf '{}' is ready".format(colored(outfile, "yellow")), "green"))
|
|
|
- else:
|
|
|
- print("[*] converting to single cbz...")
|
|
|
- shutil.make_archive(outfile, 'zip', directoryName)
|
|
|
- os.rename(outfile+".zip", outfile)
|
|
|
- print(colored("[+] output cbz '{}' is ready".format(colored(outfile, "yellow")), "green"))
|
|
|
+ self.filenames = []
|
|
|
+ self.temp_dir = str(os.getpid())+"-"+self.outfile_base
|
|
|
|
|
|
- print("[*] cleaning up jpg files...")
|
|
|
- for f in filenames:
|
|
|
- os.remove(f)
|
|
|
- os.rmdir(directoryName)
|
|
|
+ try:
|
|
|
+ os.mkdir(self.temp_dir)
|
|
|
+ except (FileExistsError, PermissionError):
|
|
|
+ error_msg("could not create temporary directory '{}'".format(self.temp_dir))
|
|
|
+ for page in tqdm(self.pages):
|
|
|
+ image_url = page["imageUri"].split('/')
|
|
|
+ image_path = os.path.join(self.temp_dir, image_url[1]+"-"+image_url[3])
|
|
|
+ request = self.request_session.get("https://"+page["imageUri"])
|
|
|
+ with open(image_path, "wb") as write_image:
|
|
|
+ write_image.write(request.content)
|
|
|
+ self.filenames.append(image_path)
|
|
|
|
|
|
+ print(colored("[+] downloaded {} jpg files".format(len(self.pages)), "green"))
|
|
|
|
|
|
-def usage():
|
|
|
- print("USAGE: issuu-dl [full issuu url of target document]")
|
|
|
- exit()
|
|
|
|
|
|
-def errorMsg():
|
|
|
- print("Error: wrong URL format, please recheck your link")
|
|
|
- exit()
|
|
|
+ def convert_files(self, output_file):
|
|
|
+ if output_file == "pdf":
|
|
|
+ print("[*] converting to single pdf...")
|
|
|
+ with open(self.outfile, "wb") as out:
|
|
|
+ out.write(img2pdf.convert(self.filenames))
|
|
|
+ print(colored("[+] output pdf '{}' is ready".format(
|
|
|
+ colored(self.outfile, "yellow")), "green"))
|
|
|
+ else:
|
|
|
+ print("[*] converting to single cbz...")
|
|
|
+ shutil.make_archive(self.outfile, 'zip', self.temp_dir)
|
|
|
+ os.rename(self.outfile+".zip", self.outfile)
|
|
|
+ print(colored("[+] output cbz '{}' is ready".format(
|
|
|
+ colored(self.outfile, "yellow")), "green"))
|
|
|
|
|
|
|
|
|
+ def remove_tempfiles(self):
|
|
|
+ print("[*] cleaning up temporary files...")
|
|
|
+ for file in self.filenames:
|
|
|
+ os.remove(file)
|
|
|
+ os.rmdir(self.temp_dir)
|
|
|
+
|
|
|
+
|
|
|
+main = typer.Typer(add_completion=False)
|
|
|
+
|
|
|
+@main.command(no_args_is_help=True)
|
|
|
+def greeting(url: str=typer.Argument(..., help='Full issuu url'), output_file:
|
|
|
+ OutType = typer.Option('pdf', "--output-type", "-o",
|
|
|
+ help='Whether output is a pdf or cbz.')):
|
|
|
+
|
|
|
+ download = Downloader()
|
|
|
+ download.check_document_format(url, output_file)
|
|
|
+ download.download_metadata(url)
|
|
|
+ download.download_pages()
|
|
|
+ download.convert_files(output_file)
|
|
|
+ download.remove_tempfiles()
|
|
|
+
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
- app()
|
|
|
+ main()
|