From 8aece5e3f9209d7c161410c304655f0aec2d6054 Mon Sep 17 00:00:00 2001 From: Allan Wang Date: Sun, 7 Jan 2018 02:43:57 -0500 Subject: Feature/website debug (#603) * Create beginning of web downloader * Clean up * Update KAU for reified activity launching * Update web attachments and setFrostColor * Test other zipper * Test simpler image saving model * Finish up image activity * Restore aggressive overlays * Try new zipper * Test again * Fix tests * Add working build * Rename * Support cancellation --- .../pitchedapps/frost/debugger/OfflineWebsite.kt | 307 +++++++++++++++++++++ 1 file changed, 307 insertions(+) create mode 100644 app/src/main/kotlin/com/pitchedapps/frost/debugger/OfflineWebsite.kt (limited to 'app/src/main/kotlin/com/pitchedapps/frost/debugger/OfflineWebsite.kt') diff --git a/app/src/main/kotlin/com/pitchedapps/frost/debugger/OfflineWebsite.kt b/app/src/main/kotlin/com/pitchedapps/frost/debugger/OfflineWebsite.kt new file mode 100644 index 00000000..434f1bae --- /dev/null +++ b/app/src/main/kotlin/com/pitchedapps/frost/debugger/OfflineWebsite.kt @@ -0,0 +1,307 @@ +package com.pitchedapps.frost.debugger + +import ca.allanwang.kau.logging.KauLoggerExtension +import com.pitchedapps.frost.facebook.FB_CSS_URL_MATCHER +import com.pitchedapps.frost.facebook.USER_AGENT_BASIC +import com.pitchedapps.frost.facebook.get +import com.pitchedapps.frost.facebook.requests.call +import com.pitchedapps.frost.facebook.requests.zip +import com.pitchedapps.frost.utils.frostJsoup +import okhttp3.Request +import okhttp3.ResponseBody +import org.jsoup.nodes.Element +import org.jsoup.nodes.Entities +import java.io.File +import java.io.FileOutputStream +import java.util.concurrent.ConcurrentHashMap +import java.util.concurrent.atomic.AtomicInteger +import java.util.zip.ZipEntry +import java.util.zip.ZipOutputStream + +/** + * Created by Allan Wang on 04/01/18. + * + * Helper to download html files and assets for offline viewing + * + * Inspired by Save for Offline + */ +class OfflineWebsite(private val url: String, + private val cookie: String = "", + /** + * Directory that holds all the files + */ + val baseDir: File, + private val userAgent: String = USER_AGENT_BASIC) { + + /** + * Supplied url without the queries + */ + val baseUrl = url.substringBefore("?").trim('/') + + private val mainFile = File(baseDir, "index.html") + private val assetDir = File(baseDir, "assets") + + private var cancelled = false + private val urlMapper = ConcurrentHashMap() + private val atomicInt = AtomicInteger() + + private val L = KauLoggerExtension("Offline", com.pitchedapps.frost.utils.L) + + init { + if (!baseUrl.startsWith("http")) + throw IllegalArgumentException("Base Url must start with http") + } + + private val fileQueue = mutableSetOf() + + private val cssQueue = mutableSetOf() + + private fun request(url: String) = Request.Builder() + .header("Cookie", cookie) + .header("User-Agent", userAgent) + .url(url) + .get() + .call() + + /** + * Caller to bind callbacks and start the load + * Callback is guaranteed to be called unless the load is cancelled + */ + fun load(progress: (Int) -> Unit = {}, callback: (Boolean) -> Unit) { + reset() + + L.v { "Saving $url to ${baseDir.absolutePath}" } + if (baseDir.exists() && !baseDir.deleteRecursively()) { + L.e { "Could not clean directory" } + return callback(false) + } + + if (!baseDir.mkdirs()) { + L.e { "Could not make directory" } + return callback(false) + } + + + if (!mainFile.createNewFile()) { + L.e { "Could not create ${mainFile.absolutePath}" } + return callback(false) + } + + + if (!assetDir.mkdirs()) { + L.e { "Could not create ${assetDir.absolutePath}" } + return callback(false) + } + + progress(10) + + if (cancelled) return + + val doc = frostJsoup(cookie, url) + doc.setBaseUri(baseUrl) + doc.outputSettings().escapeMode(Entities.EscapeMode.extended) + if (doc.childNodeSize() == 0) { + L.e { "No content found" } + return callback(false) + } + + if (cancelled) return + + progress(35) + + doc.collect("link[href][rel=stylesheet]", "href", cssQueue) + doc.collect("link[href]:not([rel=stylesheet])", "href", fileQueue) + doc.collect("img[src]", "src", fileQueue) + doc.collect("img[data-canonical-src]", "data-canonical-src", fileQueue) + doc.collect("script[src]", "src", fileQueue) + + // make links absolute + doc.select("a[href]").forEach { + val absLink = it.attr("abs:href") + it.attr("href", absLink) + } + + if (cancelled) return + + mainFile.writeText(doc.html()) + + progress(50) + + downloadCss().subscribe { cssLinks, cssThrowable -> + if (cssThrowable != null) { + L.e { "CSS parsing failed" } + } + + progress(70) + + fileQueue.addAll(cssLinks) + + if (cancelled) return@subscribe + + downloadFiles().subscribe { success, throwable -> + L.v { "All files downloaded: $success with throwable $throwable" } + progress(100) + callback(true) + } + } + } + + fun zip(name: String): Boolean { + try { + val zip = File(baseDir, "$name.zip") + if (zip.exists() && (!zip.delete() || !zip.createNewFile())) { + L.e { "Failed to create zip at ${zip.absolutePath}" } + return false + } + + ZipOutputStream(FileOutputStream(zip)).use { out -> + + fun File.zip(name: String = this.name) { + inputStream().use { file -> + out.putNextEntry(ZipEntry(name)) + file.copyTo(out) + } + out.closeEntry() + delete() + } + + mainFile.zip() + assetDir.listFiles().forEach { + it.zip("assets/${it.name}") + } + } + return true + } catch (e: Exception) { + return false + } + } + + fun loadAndZip(name: String, progress: (Int) -> Unit = {}, callback: (Boolean) -> Unit) { + + load({ progress((it * 0.85f).toInt()) }) { + if (cancelled) return@load + if (!it) callback(false) + else { + val result = zip(name) + progress(100) + callback(result) + } + } + } + + private fun downloadFiles() = fileQueue.clean().toTypedArray().zip({ + it.all { it } + }, { + it.downloadUrl({ false }) { file, body -> + body.byteStream().use { input -> + file.outputStream().use { output -> + input.copyTo(output) + return@downloadUrl true + } + } + } + }) + + private fun downloadCss() = cssQueue.clean().toTypedArray().zip, Set>({ + it.flatMap { it }.toSet() + }, { + it.downloadUrl({ emptySet() }) { file, body -> + var content = body.string() + val links = FB_CSS_URL_MATCHER.findAll(content).mapNotNull { it[1] } + val absLinks = links.mapNotNull { + val url = when { + it.startsWith("http") -> it + it.startsWith("/") -> "$baseUrl$it" + else -> return@mapNotNull null + } + // css files are already in the asset folder, + // so the url does not point to another subfolder + content = content.replace(it, url.fileName()) + url + }.toSet() + + L.v { "Abs links $absLinks" } + + file.writeText(content) + return@downloadUrl absLinks + } + }) + + private inline fun String.downloadUrl(fallback: () -> T, + action: (file: File, body: ResponseBody) -> T): T { + + val file = File(assetDir, fileName()) + if (!file.createNewFile()) { + L.e { "Could not create path for ${file.absolutePath}" } + return fallback() + } + + val body = request(this).execute().body() ?: return fallback() + + try { + body.use { + return action(file, it) + } + } catch (e: Exception) { + return fallback() + } + } + + private fun Element.collect(query: String, key: String, collector: MutableSet) { + val data = select(query) + L.v { "Found ${data.size} elements with $query" } + data.forEach { + val absLink = it.attr("abs:$key") + if (!absLink.isValid) return@forEach + collector.add(absLink) + it.attr(key, "assets/${absLink.fileName()}") + } + } + + private inline val String.isValid + get() = startsWith("http") + + /** + * Fetch the previously discovered filename + * or create a new one + * This is thread-safe + */ + private fun String.fileName(): String { + val mapped = urlMapper[this] + if (mapped != null) return mapped + + val candidate = substringBefore("?").trim('/') + .substringAfterLast("/").shorten() + + val index = atomicInt.getAndIncrement() + + /** + * This is primarily for zipping up and sending via emails + * As .js files typically aren't allowed, we'll simply make everything txt files + */ + val newUrl = "a${index}_$candidate.txt" + urlMapper.put(this, newUrl) + return newUrl + } + + private fun String.shorten() = + if (length <= 10) this else substring(length - 10) + + private fun Set.clean() + = filter(String::isNotBlank).filter { it.startsWith("http") } + + private fun reset() { + cancelled = false + urlMapper.clear() + atomicInt.set(0) + fileQueue.clear() + cssQueue.clear() + baseDir.deleteRecursively() + } + + fun cancel() { + cancelled = true + L.v { "Request cancelled" } + } + +} \ No newline at end of file -- cgit v1.2.3