/* * Copyright 2018 Allan Wang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ package com.pitchedapps.frost.debugger import ca.allanwang.kau.logging.KauLoggerExtension import com.pitchedapps.frost.facebook.FB_CSS_URL_MATCHER import com.pitchedapps.frost.facebook.USER_AGENT_BASIC import com.pitchedapps.frost.facebook.get import com.pitchedapps.frost.facebook.requests.call import com.pitchedapps.frost.facebook.requests.zip import com.pitchedapps.frost.utils.createFreshDir import com.pitchedapps.frost.utils.createFreshFile import com.pitchedapps.frost.utils.frostJsoup import com.pitchedapps.frost.utils.unescapeHtml import io.reactivex.disposables.CompositeDisposable import io.reactivex.rxkotlin.addTo import okhttp3.Request import okhttp3.ResponseBody import org.jsoup.Jsoup import org.jsoup.nodes.Document import org.jsoup.nodes.Element import org.jsoup.nodes.Entities import java.io.File import java.io.FileOutputStream import java.util.concurrent.ConcurrentHashMap import java.util.concurrent.atomic.AtomicInteger import java.util.zip.ZipEntry import java.util.zip.ZipOutputStream /** * Created by Allan Wang on 04/01/18. * * Helper to download html files and assets for offline viewing * * Inspired by Save for Offline */ class OfflineWebsite( private val url: String, private val cookie: String = "", baseUrl: String? = null, private val html: String? = null, /** * Directory that holds all the files */ val baseDir: File, private val userAgent: String = USER_AGENT_BASIC ) { /** * Supplied url without the queries */ private val baseUrl = (baseUrl ?: url.substringBefore("?") .substringBefore(".com")).trim('/') private val mainFile = File(baseDir, "index.html") private val assetDir = File(baseDir, "assets") private var cancelled = false private val urlMapper = ConcurrentHashMap() private val atomicInt = AtomicInteger() private val L = KauLoggerExtension("Offline", com.pitchedapps.frost.utils.L) init { if (!this.baseUrl.startsWith("http")) throw IllegalArgumentException("Base Url must start with http") } private val fileQueue = mutableSetOf() private val cssQueue = mutableSetOf() private fun request(url: String) = Request.Builder() .header("Cookie", cookie) .header("User-Agent", userAgent) .url(url) .get() .call() private val compositeDisposable = CompositeDisposable() /** * Caller to bind callbacks and start the load * Callback is guaranteed to be called unless the load is cancelled */ fun load(progress: (Int) -> Unit = {}, callback: (Boolean) -> Unit) { reset() L.v { "Saving $url to ${baseDir.absolutePath}" } if (!baseDir.exists() && !baseDir.mkdirs()) { L.e { "Could not make directory" } return callback(false) } if (!mainFile.createNewFile()) { L.e { "Could not create ${mainFile.absolutePath}" } return callback(false) } if (!assetDir.createFreshDir()) { L.e { "Could not create ${assetDir.absolutePath}" } return callback(false) } progress(10) if (cancelled) return val doc: Document if (html == null || html.length < 100) { doc = frostJsoup(cookie, url) } else { doc = Jsoup.parse("${html.unescapeHtml()}") L.d { "Building data from supplied content of size ${html.length}" } } doc.setBaseUri(baseUrl) doc.outputSettings().escapeMode(Entities.EscapeMode.extended) if (doc.childNodeSize() == 0) { L.e { "No content found" } return callback(false) } if (cancelled) return progress(35) doc.collect("link[href][rel=stylesheet]", "href", cssQueue) doc.collect("link[href]:not([rel=stylesheet])", "href", fileQueue) doc.collect("img[src]", "src", fileQueue) doc.collect("img[data-canonical-src]", "data-canonical-src", fileQueue) doc.collect("script[src]", "src", fileQueue) // make links absolute doc.select("a[href]").forEach { val absLink = it.attr("abs:href") it.attr("href", absLink) } if (cancelled) return mainFile.writeText(doc.html()) progress(50) downloadCss().subscribe { cssLinks, cssThrowable -> if (cssThrowable != null) { L.e { "CSS parsing failed: ${cssThrowable.message} $cssThrowable" } callback(false) return@subscribe } progress(70) fileQueue.addAll(cssLinks) if (cancelled) return@subscribe downloadFiles().subscribe { success, throwable -> L.v { "All files downloaded: $success with throwable $throwable" } progress(100) callback(true) } }.addTo(compositeDisposable) } fun zip(name: String): Boolean { try { val zip = File(baseDir, "$name.zip") if (!zip.createFreshFile()) { L.e { "Failed to create zip at ${zip.absolutePath}" } return false } ZipOutputStream(FileOutputStream(zip)).use { out -> fun File.zip(name: String = this.name) { if (!isFile) return inputStream().use { file -> out.putNextEntry(ZipEntry(name)) file.copyTo(out) } out.closeEntry() delete() } baseDir.listFiles { _, n -> n != "$name.zip" }.forEach { it.zip() } assetDir.listFiles().forEach { it.zip("assets/${it.name}") } } return true } catch (e: Exception) { L.e { "Zip failed: ${e.message}" } return false } } fun loadAndZip(name: String, progress: (Int) -> Unit = {}, callback: (Boolean) -> Unit) { load({ progress((it * 0.85f).toInt()) }) { if (cancelled) return@load if (!it) callback(false) else { val result = zip(name) progress(100) callback(result) } } } private fun downloadFiles() = fileQueue.clean().toTypedArray().zip({ it.all { self -> self } }, { it.downloadUrl({ false }) { file, body -> body.byteStream().use { input -> file.outputStream().use { output -> input.copyTo(output) return@downloadUrl true } } } }) private fun downloadCss() = cssQueue.clean().toTypedArray().zip, Set>({ it.flatMap { l -> l }.toSet() }, { cssUrl -> cssUrl.downloadUrl({ emptySet() }) { file, body -> var content = body.string() val links = FB_CSS_URL_MATCHER.findAll(content).mapNotNull { it[1] } val absLinks = links.mapNotNull { val url = when { it.startsWith("http") -> it it.startsWith("/") -> "$baseUrl$it" else -> return@mapNotNull null } // css files are already in the asset folder, // so the url does not point to another subfolder content = content.replace(it, url.fileName()) url }.toSet() L.v { "Abs links $absLinks" } file.writeText(content) return@downloadUrl absLinks } }) private inline fun String.downloadUrl( fallback: () -> T, action: (file: File, body: ResponseBody) -> T ): T { val file = File(assetDir, fileName()) if (!file.createNewFile()) { L.e { "Could not create path for ${file.absolutePath}" } return fallback() } val body = request(this).execute().body() ?: return fallback() try { body.use { return action(file, it) } } catch (e: Exception) { return fallback() } } private fun Element.collect(query: String, key: String, collector: MutableSet) { val data = select(query) L.v { "Found ${data.size} elements with $query" } data.forEach { val absLink = it.attr("abs:$key") if (!absLink.isValid) return@forEach collector.add(absLink) it.attr(key, "assets/${absLink.fileName()}") } } private inline val String.isValid get() = startsWith("http") /** * Fetch the previously discovered filename * or create a new one * This is thread-safe */ private fun String.fileName(): String { val mapped = urlMapper[this] if (mapped != null) return mapped val candidate = substringBefore("?").trim('/') .substringAfterLast("/").shorten() val index = atomicInt.getAndIncrement() var newUrl = "a${index}_$candidate" /** * This is primarily for zipping up and sending via emails * As .js files typically aren't allowed, we'll simply make everything txt files */ if (newUrl.endsWith(".js")) newUrl = "$newUrl.txt" urlMapper[this] = newUrl return newUrl } private fun String.shorten() = if (length <= 10) this else substring(length - 10) private fun Set.clean(): List = filter(String::isNotBlank).filter { it.startsWith("http") } private fun reset() { cancelled = false urlMapper.clear() atomicInt.set(0) fileQueue.clear() cssQueue.clear() } fun cancel() { cancelled = true compositeDisposable.dispose() L.v { "Request cancelled" } } }