package com.pitchedapps.frost.debugger import ca.allanwang.kau.logging.KauLoggerExtension import com.pitchedapps.frost.facebook.FB_CSS_URL_MATCHER import com.pitchedapps.frost.facebook.USER_AGENT_BASIC import com.pitchedapps.frost.facebook.get import com.pitchedapps.frost.facebook.requests.call import com.pitchedapps.frost.facebook.requests.zip import com.pitchedapps.frost.utils.createFreshDir import com.pitchedapps.frost.utils.createFreshFile import com.pitchedapps.frost.utils.frostJsoup import com.pitchedapps.frost.utils.unescapeHtml import io.reactivex.disposables.CompositeDisposable import io.reactivex.rxkotlin.addTo import okhttp3.Request import okhttp3.ResponseBody import org.jsoup.Jsoup import org.jsoup.nodes.Document import org.jsoup.nodes.Element import org.jsoup.nodes.Entities import java.io.File import java.io.FileOutputStream import java.util.concurrent.ConcurrentHashMap import java.util.concurrent.atomic.AtomicInteger import java.util.zip.ZipEntry import java.util.zip.ZipOutputStream /** * Created by Allan Wang on 04/01/18. * * Helper to download html files and assets for offline viewing * * Inspired by Save for Offline */ class OfflineWebsite(private val url: String, private val cookie: String = "", baseUrl: String? = null, private val html: String? = null, /** * Directory that holds all the files */ val baseDir: File, private val userAgent: String = USER_AGENT_BASIC) { /** * Supplied url without the queries */ private val baseUrl = (baseUrl ?: url.substringBefore("?") .substringBefore(".com")).trim('/') private val mainFile = File(baseDir, "index.html") private val assetDir = File(baseDir, "assets") private var cancelled = false private val urlMapper = ConcurrentHashMap() private val atomicInt = AtomicInteger() private val L = KauLoggerExtension("Offline", com.pitchedapps.frost.utils.L) init { if (!this.baseUrl.startsWith("http")) throw IllegalArgumentException("Base Url must start with http") } private val fileQueue = mutableSetOf() private val cssQueue = mutableSetOf() private fun request(url: String) = Request.Builder() .header("Cookie", cookie) .header("User-Agent", userAgent) .url(url) .get() .call() private val compositeDisposable = CompositeDisposable() /** * Caller to bind callbacks and start the load * Callback is guaranteed to be called unless the load is cancelled */ fun load(progress: (Int) -> Unit = {}, callback: (Boolean) -> Unit) { reset() L.v { "Saving $url to ${baseDir.absolutePath}" } if (!baseDir.exists() && !baseDir.mkdirs()) { L.e { "Could not make directory" } return callback(false) } if (!mainFile.createNewFile()) { L.e { "Could not create ${mainFile.absolutePath}" } return callback(false) } if (!assetDir.createFreshDir()) { L.e { "Could not create ${assetDir.absolutePath}" } return callback(false) } progress(10) if (cancelled) return val doc: Document if (html == null || html.length < 100) { doc = frostJsoup(cookie, url) } else { doc = Jsoup.parse("${html.unescapeHtml()}") L.d { "Building data from supplied content of size ${html.length}" } } doc.setBaseUri(baseUrl) doc.outputSettings().escapeMode(Entities.EscapeMode.extended) if (doc.childNodeSize() == 0) { L.e { "No content found" } return callback(false) } if (cancelled) return progress(35) doc.collect("link[href][rel=stylesheet]", "href", cssQueue) doc.collect("link[href]:not([rel=stylesheet])", "href", fileQueue) doc.collect("img[src]", "src", fileQueue) doc.collect("img[data-canonical-src]", "data-canonical-src", fileQueue) doc.collect("script[src]", "src", fileQueue) // make links absolute doc.select("a[href]").forEach { val absLink = it.attr("abs:href") it.attr("href", absLink) } if (cancelled) return mainFile.writeText(doc.html()) progress(50) downloadCss().subscribe { cssLinks, cssThrowable -> if (cssThrowable != null) { L.e { "CSS parsing failed: ${cssThrowable.message} $cssThrowable" } callback(false) return@subscribe } progress(70) fileQueue.addAll(cssLinks) if (cancelled) return@subscribe downloadFiles().subscribe { success, throwable -> L.v { "All files downloaded: $success with throwable $throwable" } progress(100) callback(true) } }.addTo(compositeDisposable) } fun zip(name: String): Boolean { try { val zip = File(baseDir, "$name.zip") if (!zip.createFreshFile()) { L.e { "Failed to create zip at ${zip.absolutePath}" } return false } ZipOutputStream(FileOutputStream(zip)).use { out -> fun File.zip(name: String = this.name) { if (!isFile) return inputStream().use { file -> out.putNextEntry(ZipEntry(name)) file.copyTo(out) } out.closeEntry() delete() } baseDir.listFiles { _, n -> n != "$name.zip" }.forEach { it.zip() } assetDir.listFiles().forEach { it.zip("assets/${it.name}") } } return true } catch (e: Exception) { L.e { "Zip failed: ${e.message}" } return false } } fun loadAndZip(name: String, progress: (Int) -> Unit = {}, callback: (Boolean) -> Unit) { load({ progress((it * 0.85f).toInt()) }) { if (cancelled) return@load if (!it) callback(false) else { val result = zip(name) progress(100) callback(result) } } } private fun downloadFiles() = fileQueue.clean().toTypedArray().zip({ it.all { self -> self } }, { it.downloadUrl({ false }) { file, body -> body.byteStream().use { input -> file.outputStream().use { output -> input.copyTo(output) return@downloadUrl true } } } }) private fun downloadCss() = cssQueue.clean().toTypedArray().zip, Set>({ it.flatMap { l -> l }.toSet() }, { cssUrl -> cssUrl.downloadUrl({ emptySet() }) { file, body -> var content = body.string() val links = FB_CSS_URL_MATCHER.findAll(content).mapNotNull { it[1] } val absLinks = links.mapNotNull { val url = when { it.startsWith("http") -> it it.startsWith("/") -> "$baseUrl$it" else -> return@mapNotNull null } // css files are already in the asset folder, // so the url does not point to another subfolder content = content.replace(it, url.fileName()) url }.toSet() L.v { "Abs links $absLinks" } file.writeText(content) return@downloadUrl absLinks } }) private inline fun String.downloadUrl(fallback: () -> T, action: (file: File, body: ResponseBody) -> T): T { val file = File(assetDir, fileName()) if (!file.createNewFile()) { L.e { "Could not create path for ${file.absolutePath}" } return fallback() } val body = request(this).execute().body() ?: return fallback() try { body.use { return action(file, it) } } catch (e: Exception) { return fallback() } } private fun Element.collect(query: String, key: String, collector: MutableSet) { val data = select(query) L.v { "Found ${data.size} elements with $query" } data.forEach { val absLink = it.attr("abs:$key") if (!absLink.isValid) return@forEach collector.add(absLink) it.attr(key, "assets/${absLink.fileName()}") } } private inline val String.isValid get() = startsWith("http") /** * Fetch the previously discovered filename * or create a new one * This is thread-safe */ private fun String.fileName(): String { val mapped = urlMapper[this] if (mapped != null) return mapped val candidate = substringBefore("?").trim('/') .substringAfterLast("/").shorten() val index = atomicInt.getAndIncrement() var newUrl = "a${index}_$candidate" /** * This is primarily for zipping up and sending via emails * As .js files typically aren't allowed, we'll simply make everything txt files */ if (newUrl.endsWith(".js")) newUrl = "$newUrl.txt" urlMapper[this] = newUrl return newUrl } private fun String.shorten() = if (length <= 10) this else substring(length - 10) private fun Set.clean(): List = filter(String::isNotBlank).filter { it.startsWith("http") } private fun reset() { cancelled = false urlMapper.clear() atomicInt.set(0) fileQueue.clear() cssQueue.clear() } fun cancel() { cancelled = true compositeDisposable.dispose() L.v { "Request cancelled" } } }