aboutsummaryrefslogtreecommitdiff
path: root/app/src/main/kotlin/com/pitchedapps/frost/utils/JsoupCleaner.kt
diff options
context:
space:
mode:
Diffstat (limited to 'app/src/main/kotlin/com/pitchedapps/frost/utils/JsoupCleaner.kt')
-rw-r--r--app/src/main/kotlin/com/pitchedapps/frost/utils/JsoupCleaner.kt34
1 files changed, 34 insertions, 0 deletions
diff --git a/app/src/main/kotlin/com/pitchedapps/frost/utils/JsoupCleaner.kt b/app/src/main/kotlin/com/pitchedapps/frost/utils/JsoupCleaner.kt
new file mode 100644
index 00000000..da8672f4
--- /dev/null
+++ b/app/src/main/kotlin/com/pitchedapps/frost/utils/JsoupCleaner.kt
@@ -0,0 +1,34 @@
+package com.pitchedapps.frost.utils
+
+import org.jsoup.Jsoup
+import org.jsoup.nodes.Attribute
+import org.jsoup.nodes.Element
+import org.jsoup.safety.Whitelist
+
+/**
+ * Created by Allan Wang on 2017-08-10.
+ *
+ * Parses html with Jsoup and cleans the data, emitting just the frame containing debugging info
+ *
+ * Removes text, removes unnecessary nodes
+ */
+fun String.cleanHtml() = cleanText().cleanJsoup()
+
+internal fun String.cleanText(): String = replace(Regex(">(?s).+?<"), "><")
+
+internal fun String.cleanJsoup(): String = Jsoup.clean(this, PrivacyWhitelist())
+
+class PrivacyWhitelist : Whitelist() {
+
+ val blacklistAttrs = arrayOf("style", "aria-label", "rel")
+ val blacklistTags = arrayOf("body", "html", "head", "i", "b", "u", "style", "script",
+ "br", "p", "span", "ul", "ol", "li")
+
+ override fun isSafeAttribute(tagName: String, el: Element, attr: Attribute): Boolean {
+ val key = attr.key
+ if (key == "href") attr.setValue("-")
+ return key !in blacklistAttrs
+ }
+
+ override fun isSafeTag(tag: String) = tag !in blacklistTags
+}