noah
/
issuu-dl


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
							#!/usr/bin/env python3
#
# issuu-dl v0.5
#
# download documents - as pdf's or cbz's - from issuu.com
#
# tested (and working) on 05 Apr 2022

# arch-deps: python-tqdm, python-termcolor, img2pdf, typer

import os
import sys
import shutil
import json
from enum import Enum

import typer
import requests
import img2pdf
from termcolor import colored
from tqdm import tqdm


class OutType(str, Enum):
    PDF = "pdf"
    CBZ = "cbz"


def error_msg(msg: str):
    print(colored("[*] Error: {}".format(msg), "red"))
    sys.exit(1)


class Downloader:
    def __init__(self):
        self.filenames = []
        self.temp_dir = ""
        self.target_doc = ""
        self.outfile = ""
        self.outfile_base = ""
        self.pages = ""
        self.request_session = requests.Session()

    def check_document_format(self, url, output_file):
        self.target_doc = url.split("/")
        try:
            self.target_doc = self.target_doc[3] + "/" + self.target_doc[5]
        except IndexError:
            error_msg("wrong URL format, please recheck your link")
        print(
            "[*] target_doc is '{}'".format(colored(self.target_doc, "yellow"))
        )
        self.outfile_base = self.target_doc.replace("/", "_")
        self.outfile = self.outfile_base + "." + output_file

    def download_metadata(self, url):
        print("[*] opening page...")
        request_session = requests.Session()

        try:
            request = request_session.get(url)
        except requests.exceptions.TooManyRedirects:
            error_msg("too many redirect requests made")
        try:
            assert request.status_code == 200
        except AssertionError:
            error_msg(
                "http status code received: {}".format(request.status_code)
            )
        print(colored("[+] url confirmed", "green"))
        assert "issuu-reader3-embed-files" in request.content.decode()
        print(colored("[+] reader3 support confirmed", "green"))

        print("[*] downloading reader3_4.json...")

        request = request_session.get(
            "https://reader3.isu.pub/{}/reader3_4.json".format(self.target_doc)
        )
        json_data = json.loads(request.content.decode())

        publication_id = json_data["document"]["publicationId"]
        revision_id = json_data["document"]["revisionId"]
        self.pages = json_data["document"]["pages"]

        print(
            colored("[+] fetched document data: ", "green")
            + colored(
                "publicationId is {}, revisionId is {}".format(
                    publication_id, revision_id
                ),
                "yellow",
            )
        )
        print(colored("[+] found {} pages".format(len(self.pages)), "green"))

    def download_pages(self):
        print("[*] downloading pages...")

        self.filenames = []
        self.temp_dir = str(os.getpid()) + "-" + self.outfile_base

        try:
            os.mkdir(self.temp_dir)
        except (FileExistsError, PermissionError):
            error_msg(
                "could not create temporary directory '{}'".format(
                    self.temp_dir
                )
            )
        for page in tqdm(self.pages):
            image_url = page["imageUri"].split("/")
            image_path = os.path.join(
                self.temp_dir, image_url[1] + "-" + image_url[3]
            )
            request = self.request_session.get("https://" + page["imageUri"])
            with open(image_path, "wb") as write_image:
                write_image.write(request.content)
            self.filenames.append(image_path)

        print(
            colored(
                "[+] downloaded {} jpg files".format(len(self.pages)), "green"
            )
        )

    def convert_files(self, output_file):
        if output_file == "pdf":
            print("[*] converting to single pdf...")
            with open(self.outfile, "wb") as out:
                out.write(img2pdf.convert(self.filenames))
            print(
                colored(
                    "[+] output pdf '{}' is ready".format(
                        colored(self.outfile, "yellow")
                    ),
                    "green",
                )
            )
        else:
            print("[*] converting to single cbz...")
            shutil.make_archive(self.outfile, "zip", self.temp_dir)
            os.rename(self.outfile + ".zip", self.outfile)
            print(
                colored(
                    "[+] output cbz '{}' is ready".format(
                        colored(self.outfile, "yellow")
                    ),
                    "green",
                )
            )

    def remove_tempfiles(self):
        print("[*] cleaning up temporary files...")
        for file in self.filenames:
            os.remove(file)
        os.rmdir(self.temp_dir)


main = typer.Typer(add_completion=False)


@main.command(no_args_is_help=True)
def greeting(
    url: str = typer.Argument(..., help="Full issuu url"),
    output_file: OutType = typer.Option(
        "pdf", "--output-type", "-o", help="Whether output is a pdf or cbz."
    ),
):

    download = Downloader()
    download.check_document_format(url, output_file)
    download.download_metadata(url)
    download.download_pages()
    download.convert_files(output_file)
    download.remove_tempfiles()


if __name__ == "__main__":
    main()