package com.pitchedapps.frost.debugger import ca.allanwang.kau.logging.KauLoggerExtension import com.pitchedapps.frost.facebook.FB_CSS_URL_MATCHER import com.pitchedapps.frost.facebook.USER_AGENT_BASIC import com.pitchedapps.frost.facebook.get import com.pitchedapps.frost.facebook.requests.call import com.pitchedapps.frost.facebook.requests.zip import com.pitchedapps.frost.utils.frostJsoup import okhttp3.Request import okhttp3.ResponseBody import org.jsoup.nodes.Element import org.jsoup.nodes.Entities import java.io.File import java.io.FileOutputStream import java.util.concurrent.ConcurrentHashMap import java.util.concurrent.atomic.AtomicInteger import java.util.zip.ZipEntry import java.util.zip.ZipOutputStream /** * Created by Allan Wang on 04/01/18. * * Helper to download html files and assets for offline viewing * * Inspired by Save for Offline */ class OfflineWebsite(private val url: String, private val cookie: String = "", /** * Directory that holds all the files */ val baseDir: File, private val userAgent: String = USER_AGENT_BASIC) { /** * Supplied url without the queries */ val baseUrl = url.substringBefore("?").trim('/') private val mainFile = File(baseDir, "index.html") private val assetDir = File(baseDir, "assets") private var cancelled = false private val urlMapper = ConcurrentHashMap() private val atomicInt = AtomicInteger() private val L = KauLoggerExtension("Offline", com.pitchedapps.frost.utils.L) init { if (!baseUrl.startsWith("http")) throw IllegalArgumentException("Base Url must start with http") } private val fileQueue = mutableSetOf() private val cssQueue = mutableSetOf() private fun request(url: String) = Request.Builder() .header("Cookie", cookie) .header("User-Agent", userAgent) .url(url) .get() .call() /** * Caller to bind callbacks and start the load * Callback is guaranteed to be called unless the load is cancelled */ fun load(progress: (Int) -> Unit = {}, callback: (Boolean) -> Unit) { reset() L.v { "Saving $url to ${baseDir.absolutePath}" } if (baseDir.exists() && !baseDir.deleteRecursively()) { L.e { "Could not clean directory" } return callback(false) } if (!baseDir.mkdirs()) { L.e { "Could not make directory" } return callback(false) } if (!mainFile.createNewFile()) { L.e { "Could not create ${mainFile.absolutePath}" } return callback(false) } if (!assetDir.mkdirs()) { L.e { "Could not create ${assetDir.absolutePath}" } return callback(false) } progress(10) if (cancelled) return val doc = frostJsoup(cookie, url) doc.setBaseUri(baseUrl) doc.outputSettings().escapeMode(Entities.EscapeMode.extended) if (doc.childNodeSize() == 0) { L.e { "No content found" } return callback(false) } if (cancelled) return progress(35) doc.collect("link[href][rel=stylesheet]", "href", cssQueue) doc.collect("link[href]:not([rel=stylesheet])", "href", fileQueue) doc.collect("img[src]", "src", fileQueue) doc.collect("img[data-canonical-src]", "data-canonical-src", fileQueue) doc.collect("script[src]", "src", fileQueue) // make links absolute doc.select("a[href]").forEach { val absLink = it.attr("abs:href") it.attr("href", absLink) } if (cancelled) return mainFile.writeText(doc.html()) progress(50) downloadCss().subscribe { cssLinks, cssThrowable -> if (cssThrowable != null) { L.e { "CSS parsing failed" } } progress(70) fileQueue.addAll(cssLinks) if (cancelled) return@subscribe downloadFiles().subscribe { success, throwable -> L.v { "All files downloaded: $success with throwable $throwable" } progress(100) callback(true) } } } fun zip(name: String): Boolean { try { val zip = File(baseDir, "$name.zip") if (zip.exists() && (!zip.delete() || !zip.createNewFile())) { L.e { "Failed to create zip at ${zip.absolutePath}" } return false } ZipOutputStream(FileOutputStream(zip)).use { out -> fun File.zip(name: String = this.name) { inputStream().use { file -> out.putNextEntry(ZipEntry(name)) file.copyTo(out) } out.closeEntry() delete() } mainFile.zip() assetDir.listFiles().forEach { it.zip("assets/${it.name}") } } return true } catch (e: Exception) { return false } } fun loadAndZip(name: String, progress: (Int) -> Unit = {}, callback: (Boolean) -> Unit) { load({ progress((it * 0.85f).toInt()) }) { if (cancelled) return@load if (!it) callback(false) else { val result = zip(name) progress(100) callback(result) } } } private fun downloadFiles() = fileQueue.clean().toTypedArray().zip({ it.all { it } }, { it.downloadUrl({ false }) { file, body -> body.byteStream().use { input -> file.outputStream().use { output -> input.copyTo(output) return@downloadUrl true } } } }) private fun downloadCss() = cssQueue.clean().toTypedArray().zip, Set>({ it.flatMap { it }.toSet() }, { it.downloadUrl({ emptySet() }) { file, body -> var content = body.string() val links = FB_CSS_URL_MATCHER.findAll(content).mapNotNull { it[1] } val absLinks = links.mapNotNull { val url = when { it.startsWith("http") -> it it.startsWith("/") -> "$baseUrl$it" else -> return@mapNotNull null } // css files are already in the asset folder, // so the url does not point to another subfolder content = content.replace(it, url.fileName()) url }.toSet() L.v { "Abs links $absLinks" } file.writeText(content) return@downloadUrl absLinks } }) private inline fun String.downloadUrl(fallback: () -> T, action: (file: File, body: ResponseBody) -> T): T { val file = File(assetDir, fileName()) if (!file.createNewFile()) { L.e { "Could not create path for ${file.absolutePath}" } return fallback() } val body = request(this).execute().body() ?: return fallback() try { body.use { return action(file, it) } } catch (e: Exception) { return fallback() } } private fun Element.collect(query: String, key: String, collector: MutableSet) { val data = select(query) L.v { "Found ${data.size} elements with $query" } data.forEach { val absLink = it.attr("abs:$key") if (!absLink.isValid) return@forEach collector.add(absLink) it.attr(key, "assets/${absLink.fileName()}") } } private inline val String.isValid get() = startsWith("http") /** * Fetch the previously discovered filename * or create a new one * This is thread-safe */ private fun String.fileName(): String { val mapped = urlMapper[this] if (mapped != null) return mapped val candidate = substringBefore("?").trim('/') .substringAfterLast("/").shorten() val index = atomicInt.getAndIncrement() /** * This is primarily for zipping up and sending via emails * As .js files typically aren't allowed, we'll simply make everything txt files */ val newUrl = "a${index}_$candidate.txt" urlMapper.put(this, newUrl) return newUrl } private fun String.shorten() = if (length <= 10) this else substring(length - 10) private fun Set.clean() = filter(String::isNotBlank).filter { it.startsWith("http") } private fun reset() { cancelled = false urlMapper.clear() atomicInt.set(0) fileQueue.clear() cssQueue.clear() baseDir.deleteRecursively() } fun cancel() { cancelled = true L.v { "Request cancelled" } } }