aboutsummaryrefslogtreecommitdiff
path: root/app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers
diff options
context:
space:
mode:
authorAllan Wang <me@allanwang.ca>2018-09-27 18:03:53 -0400
committerGitHub <noreply@github.com>2018-09-27 18:03:53 -0400
commit7460935f32748b10f6b3fedf9e77a373a9010d05 (patch)
treeeeaa5c880679198ee1a9dff885a1c7fc6c84207e /app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers
parent5db95f245101b221ca5669c4e4b5e526d941d68f (diff)
downloadfrost-7460935f32748b10f6b3fedf9e77a373a9010d05.tar.gz
frost-7460935f32748b10f6b3fedf9e77a373a9010d05.tar.bz2
frost-7460935f32748b10f6b3fedf9e77a373a9010d05.zip
Move parsers to facebook folder (#1109)
Diffstat (limited to 'app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers')
-rw-r--r--app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/FrostParser.kt128
-rw-r--r--app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/MessageParser.kt126
-rw-r--r--app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/NotifParser.kt100
-rw-r--r--app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/SearchParser.kt77
4 files changed, 431 insertions, 0 deletions
diff --git a/app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/FrostParser.kt b/app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/FrostParser.kt
new file mode 100644
index 00000000..5d023023
--- /dev/null
+++ b/app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/FrostParser.kt
@@ -0,0 +1,128 @@
+package com.pitchedapps.frost.facebook.parsers
+
+import com.pitchedapps.frost.dbflow.CookieModel
+import com.pitchedapps.frost.facebook.FB_CSS_URL_MATCHER
+import com.pitchedapps.frost.facebook.formattedFbUrl
+import com.pitchedapps.frost.facebook.get
+import com.pitchedapps.frost.services.NotificationContent
+import com.pitchedapps.frost.utils.frostJsoup
+import org.jsoup.Jsoup
+import org.jsoup.nodes.Document
+import org.jsoup.nodes.Element
+import org.jsoup.select.Elements
+
+/**
+ * Created by Allan Wang on 2017-10-06.
+ *
+ * Interface for a given parser
+ * Use cases should be attached as delegates to objects that implement this interface
+ *
+ * In all cases, parsing will be done from a JSoup document
+ * Variants accepting strings are also permitted, and they will be converted to documents accordingly
+ * The return type must be nonnull if no parsing errors occurred, as null signifies a parse error
+ * If null really must be allowed, use Optionals
+ */
+interface FrostParser<out T : Any> {
+
+ /**
+ * Name associated to parser
+ * Purely for display
+ */
+ var nameRes: Int
+
+ /**
+ * Url to request from
+ */
+ val url: String
+
+ /**
+ * Call parsing with default implementation using cookie
+ */
+ fun parse(cookie: String?): ParseResponse<T>?
+
+ /**
+ * Call parsing with given document
+ */
+ fun parse(cookie: String?, document: Document): ParseResponse<T>?
+
+ /**
+ * Call parsing using jsoup to fetch from given url
+ */
+ fun parseFromUrl(cookie: String?, url: String): ParseResponse<T>?
+
+ /**
+ * Call parsing with given data
+ */
+ fun parseFromData(cookie: String?, text: String): ParseResponse<T>?
+
+}
+
+const val FALLBACK_TIME_MOD = 1000000
+
+data class FrostLink(val text: String, val href: String)
+
+data class ParseResponse<out T>(val cookie: String, val data: T) {
+ override fun toString() = "ParseResponse\ncookie: $cookie\ndata:\n$data"
+}
+
+interface ParseNotification {
+ fun getUnreadNotifications(data: CookieModel): List<NotificationContent>
+}
+
+internal fun <T> List<T>.toJsonString(tag: String, indent: Int) = StringBuilder().apply {
+ val tabs = "\t".repeat(indent)
+ append("$tabs$tag: [\n\t$tabs")
+ append(this@toJsonString.joinToString("\n\t$tabs"))
+ append("\n$tabs]\n")
+}.toString()
+
+/**
+ * T should have a readable toString() function
+ * [redirectToText] dictates whether all data should be converted to text then back to document before parsing
+ */
+internal abstract class FrostParserBase<out T : Any>(private val redirectToText: Boolean) : FrostParser<T> {
+
+ final override fun parse(cookie: String?) = parseFromUrl(cookie, url)
+
+ final override fun parseFromData(cookie: String?, text: String): ParseResponse<T>? {
+ cookie ?: return null
+ val doc = textToDoc(text) ?: return null
+ val data = parseImpl(doc) ?: return null
+ return ParseResponse(cookie, data)
+ }
+
+ final override fun parseFromUrl(cookie: String?, url: String): ParseResponse<T>? =
+ parse(cookie, frostJsoup(cookie, url))
+
+ override fun parse(cookie: String?, document: Document): ParseResponse<T>? {
+ cookie ?: return null
+ if (redirectToText)
+ return parseFromData(cookie, document.toString())
+ val data = parseImpl(document) ?: return null
+ return ParseResponse(cookie, data)
+ }
+
+ protected abstract fun parseImpl(doc: Document): T?
+
+ // protected abstract fun parse(doc: Document): T?
+
+ /**
+ * Attempts to find inner <i> element with some style containing a url
+ * Returns the formatted url, or an empty string if nothing was found
+ */
+ protected fun Element.getInnerImgStyle() =
+ select("i.img[style*=url]").getStyleUrl()
+
+ protected fun Elements.getStyleUrl() =
+ FB_CSS_URL_MATCHER.find(attr("style"))[1]?.formattedFbUrl
+
+ protected open fun textToDoc(text: String) = if (!redirectToText)
+ Jsoup.parse(text)
+ else
+ throw RuntimeException("${this::class.java.simpleName} requires text redirect but did not implement textToDoc")
+
+ protected fun parseLink(element: Element?): FrostLink? {
+ val a = element?.getElementsByTag("a")?.first() ?: return null
+ return FrostLink(a.text(), a.attr("href"))
+ }
+} \ No newline at end of file
diff --git a/app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/MessageParser.kt b/app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/MessageParser.kt
new file mode 100644
index 00000000..f32c3452
--- /dev/null
+++ b/app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/MessageParser.kt
@@ -0,0 +1,126 @@
+package com.pitchedapps.frost.facebook.parsers
+
+import com.pitchedapps.frost.dbflow.CookieModel
+import com.pitchedapps.frost.facebook.*
+import com.pitchedapps.frost.services.NotificationContent
+import com.pitchedapps.frost.utils.L
+import org.apache.commons.text.StringEscapeUtils
+import org.jsoup.Jsoup
+import org.jsoup.nodes.Document
+import org.jsoup.nodes.Element
+
+/**
+ * Created by Allan Wang on 2017-10-06.
+ *
+ * In Facebook, messages are passed through scripts and loaded into view via react afterwards
+ * We can parse out the content we want directly and load it ourselves
+ *
+ */
+object MessageParser : FrostParser<FrostMessages> by MessageParserImpl() {
+
+ fun queryUser(cookie: String?, name: String) = parseFromUrl(cookie, "${FbItem.MESSAGES.url}/?q=$name")
+
+}
+
+data class FrostMessages(val threads: List<FrostThread>,
+ val seeMore: FrostLink?,
+ val extraLinks: List<FrostLink>
+) : ParseNotification {
+ override fun toString() = StringBuilder().apply {
+ append("FrostMessages {\n")
+ append(threads.toJsonString("threads", 1))
+ append("\tsee more: $seeMore\n")
+ append(extraLinks.toJsonString("extra links", 1))
+ append("}")
+ }.toString()
+
+ override fun getUnreadNotifications(data: CookieModel) =
+ threads.filter(FrostThread::unread).map {
+ with(it) {
+ NotificationContent(
+ data = data,
+ id = id,
+ href = url,
+ title = title,
+ text = content ?: "",
+ timestamp = time,
+ profileUrl = img
+ )
+ }
+ }
+}
+
+/**
+ * [id] user/thread id, or current time fallback
+ * [img] parsed url for profile img
+ * [time] time of message
+ * [url] link to thread
+ * [unread] true if image is unread, false otherwise
+ * [content] optional string for thread
+ */
+data class FrostThread(val id: Long,
+ val img: String?,
+ val title: String,
+ val time: Long,
+ val url: String,
+ val unread: Boolean,
+ val content: String?,
+ val contentImgUrl: String?)
+
+private class MessageParserImpl : FrostParserBase<FrostMessages>(true) {
+
+ override var nameRes = FbItem.MESSAGES.titleId
+
+ override val url = FbItem.MESSAGES.url
+
+ override fun textToDoc(text: String): Document? {
+ var content = StringEscapeUtils.unescapeEcmaScript(text)
+ val begin = content.indexOf("id=\"threadlist_rows\"")
+ if (begin <= 0) {
+ L.d { "Threadlist not found" }
+ return null
+ }
+ content = content.substring(begin)
+ val end = content.indexOf("</script>")
+ if (end <= 0) {
+ L.d { "Script tail not found" }
+ return null
+ }
+ content = content.substring(0, end).substringBeforeLast("</div>")
+ return Jsoup.parseBodyFragment("<div $content")
+ }
+
+ override fun parseImpl(doc: Document): FrostMessages? {
+ val threadList = doc.getElementById("threadlist_rows") ?: return null
+ val threads: List<FrostThread> = threadList.getElementsByAttributeValueContaining("id", "thread_fbid_")
+ .mapNotNull(this::parseMessage)
+ val seeMore = parseLink(doc.getElementById("see_older_threads"))
+ val extraLinks = threadList.nextElementSibling().select("a")
+ .mapNotNull(this::parseLink)
+ return FrostMessages(threads, seeMore, extraLinks)
+ }
+
+ private fun parseMessage(element: Element): FrostThread? {
+ val a = element.getElementsByTag("a").first() ?: return null
+ val abbr = element.getElementsByTag("abbr")
+ val epoch = FB_EPOCH_MATCHER.find(abbr.attr("data-store"))[1]?.toLongOrNull() ?: -1L
+ //fetch id
+ val id = FB_MESSAGE_NOTIF_ID_MATCHER.find(element.id())[1]?.toLongOrNull()
+ ?: System.currentTimeMillis() % FALLBACK_TIME_MOD
+ val snippet = element.select("span.snippet").firstOrNull()
+ val content = snippet?.text()?.trim()
+ val contentImg = snippet?.select("i[style*=url]")?.getStyleUrl()
+ val img = element.getInnerImgStyle()
+ return FrostThread(
+ id = id,
+ img = img,
+ title = a.text(),
+ time = epoch,
+ url = a.attr("href").formattedFbUrl,
+ unread = !element.hasClass("acw"),
+ content = content,
+ contentImgUrl = contentImg
+ )
+ }
+
+}
diff --git a/app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/NotifParser.kt b/app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/NotifParser.kt
new file mode 100644
index 00000000..03b913c7
--- /dev/null
+++ b/app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/NotifParser.kt
@@ -0,0 +1,100 @@
+package com.pitchedapps.frost.facebook.parsers
+
+import com.pitchedapps.frost.dbflow.CookieModel
+import com.pitchedapps.frost.facebook.*
+import com.pitchedapps.frost.services.NotificationContent
+import org.jsoup.nodes.Document
+import org.jsoup.nodes.Element
+
+/**
+ * Created by Allan Wang on 2017-12-25.
+ *
+ */
+object NotifParser : FrostParser<FrostNotifs> by NotifParserImpl()
+
+data class FrostNotifs(
+ val notifs: List<FrostNotif>,
+ val seeMore: FrostLink?
+) : ParseNotification {
+ override fun toString() = StringBuilder().apply {
+ append("FrostNotifs {\n")
+ append(notifs.toJsonString("notifs", 1))
+ append("\tsee more: $seeMore\n")
+ append("}")
+ }.toString()
+
+ override fun getUnreadNotifications(data: CookieModel) =
+ notifs.filter(FrostNotif::unread).map {
+ with(it) {
+ NotificationContent(
+ data = data,
+ id = id,
+ href = url,
+ title = null,
+ text = content,
+ timestamp = time,
+ profileUrl = img
+ )
+ }
+ }
+}
+
+/**
+ * [id] notif id, or current time fallback
+ * [img] parsed url for profile img
+ * [time] time of message
+ * [url] link to thread
+ * [unread] true if image is unread, false otherwise
+ * [content] optional string for thread
+ * [timeString] text version of time from Facebook
+ * [thumbnailUrl] optional thumbnail url if existent
+ */
+data class FrostNotif(val id: Long,
+ val img: String?,
+ val time: Long,
+ val url: String,
+ val unread: Boolean,
+ val content: String,
+ val timeString: String,
+ val thumbnailUrl: String?)
+
+private class NotifParserImpl : FrostParserBase<FrostNotifs>(false) {
+
+ override var nameRes = FbItem.NOTIFICATIONS.titleId
+
+ override val url = FbItem.NOTIFICATIONS.url
+
+ override fun parseImpl(doc: Document): FrostNotifs? {
+ val notificationList = doc.getElementById("notifications_list") ?: return null
+ val notifications = notificationList
+ .getElementsByAttributeValueContaining("id", "list_notif_")
+ .mapNotNull(this::parseNotif)
+ val seeMore = parseLink(doc.getElementsByAttributeValue("href", "/notifications.php?more").first())
+ return FrostNotifs(notifications, seeMore)
+ }
+
+ private fun parseNotif(element: Element): FrostNotif? {
+ val a = element.getElementsByTag("a").first() ?: return null
+ val abbr = element.getElementsByTag("abbr")
+ val epoch = FB_EPOCH_MATCHER.find(abbr.attr("data-store"))[1]?.toLongOrNull() ?: -1L
+ //fetch id
+ val id = FB_NOTIF_ID_MATCHER.find(element.id())[1]?.toLongOrNull()
+ ?: System.currentTimeMillis() % FALLBACK_TIME_MOD
+ val img = element.getInnerImgStyle()
+ val timeString = abbr.text()
+ val content = a.text().replace("\u00a0", " ").removeSuffix(timeString).trim() //remove &nbsp;
+ val thumbnail = element.selectFirst("img.thumbnail")?.attr("src")
+ return FrostNotif(
+ id = id,
+ img = img,
+ time = epoch,
+ url = a.attr("href").formattedFbUrl,
+ unread = !element.hasClass("acw"),
+ content = content,
+ timeString = timeString,
+ thumbnailUrl = if (thumbnail?.isNotEmpty() == true) thumbnail else null
+ )
+ }
+
+
+}
diff --git a/app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/SearchParser.kt b/app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/SearchParser.kt
new file mode 100644
index 00000000..d3367514
--- /dev/null
+++ b/app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/SearchParser.kt
@@ -0,0 +1,77 @@
+package com.pitchedapps.frost.facebook.parsers
+
+import ca.allanwang.kau.searchview.SearchItem
+import com.pitchedapps.frost.facebook.FbItem
+import com.pitchedapps.frost.facebook.formattedFbUrl
+import com.pitchedapps.frost.facebook.parsers.FrostSearch.Companion.create
+import com.pitchedapps.frost.utils.L
+import org.jsoup.nodes.Document
+import org.jsoup.nodes.Element
+
+/**
+ * Created by Allan Wang on 2017-10-09.
+ */
+object SearchParser : FrostParser<FrostSearches> by SearchParserImpl() {
+ fun query(cookie: String?, input: String): ParseResponse<FrostSearches>? {
+ val url = "${FbItem._SEARCH.url}?q=${if (input.isNotBlank()) input else "a"}"
+ L._i { "Search Query $url" }
+ return parseFromUrl(cookie, url)
+ }
+}
+
+enum class SearchKeys(val key: String) {
+ USERS("keywords_users"),
+ EVENTS("keywords_events")
+}
+
+data class FrostSearches(val results: List<FrostSearch>) {
+
+ override fun toString() = StringBuilder().apply {
+ append("FrostSearches {\n")
+ append(results.toJsonString("results", 1))
+ append("}")
+ }.toString()
+}
+
+/**
+ * As far as I'm aware, all links are independent, so the queries don't matter
+ * A lot of it is tracking information, which I'll strip away
+ * Other text items are formatted for safety
+ *
+ * Note that it's best to create search results from [create]
+ */
+data class FrostSearch(val href: String, val title: String, val description: String?) {
+
+ fun toSearchItem() = SearchItem(href, title, description)
+
+ companion object {
+ fun create(href: String, title: String, description: String?) = FrostSearch(
+ with(href.indexOf("?")) { if (this == -1) href else href.substring(0, this) },
+ title.format(),
+ description?.format()
+ )
+ }
+}
+
+private class SearchParserImpl : FrostParserBase<FrostSearches>(false) {
+
+ override var nameRes = FbItem._SEARCH.titleId
+
+ override val url = "${FbItem._SEARCH.url}?q=a"
+
+ override fun parseImpl(doc: Document): FrostSearches? {
+ val container: Element = doc.getElementById("BrowseResultsContainer")
+ ?: doc.getElementById("root")
+ ?: return null
+ /**
+ *
+ * Removed [data-store*=result_id]
+ */
+ return FrostSearches(container.select("a.touchable[href]").filter(Element::hasText).map {
+ FrostSearch.create(it.attr("href").formattedFbUrl,
+ it.select("._uoi").first()?.text() ?: "",
+ it.select("._1tcc").first()?.text())
+ }.filter { it.title.isNotBlank() })
+ }
+
+} \ No newline at end of file