diff options
Diffstat (limited to 'app/src/main/kotlin/com/pitchedapps/frost/utils/JsoupCleaner.kt')
-rw-r--r-- | app/src/main/kotlin/com/pitchedapps/frost/utils/JsoupCleaner.kt | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/app/src/main/kotlin/com/pitchedapps/frost/utils/JsoupCleaner.kt b/app/src/main/kotlin/com/pitchedapps/frost/utils/JsoupCleaner.kt new file mode 100644 index 00000000..da8672f4 --- /dev/null +++ b/app/src/main/kotlin/com/pitchedapps/frost/utils/JsoupCleaner.kt @@ -0,0 +1,34 @@ +package com.pitchedapps.frost.utils + +import org.jsoup.Jsoup +import org.jsoup.nodes.Attribute +import org.jsoup.nodes.Element +import org.jsoup.safety.Whitelist + +/** + * Created by Allan Wang on 2017-08-10. + * + * Parses html with Jsoup and cleans the data, emitting just the frame containing debugging info + * + * Removes text, removes unnecessary nodes + */ +fun String.cleanHtml() = cleanText().cleanJsoup() + +internal fun String.cleanText(): String = replace(Regex(">(?s).+?<"), "><") + +internal fun String.cleanJsoup(): String = Jsoup.clean(this, PrivacyWhitelist()) + +class PrivacyWhitelist : Whitelist() { + + val blacklistAttrs = arrayOf("style", "aria-label", "rel") + val blacklistTags = arrayOf("body", "html", "head", "i", "b", "u", "style", "script", + "br", "p", "span", "ul", "ol", "li") + + override fun isSafeAttribute(tagName: String, el: Element, attr: Attribute): Boolean { + val key = attr.key + if (key == "href") attr.setValue("-") + return key !in blacklistAttrs + } + + override fun isSafeTag(tag: String) = tag !in blacklistTags +} |