diff options
author | Allan Wang <me@allanwang.ca> | 2018-09-27 18:03:53 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-09-27 18:03:53 -0400 |
commit | 7460935f32748b10f6b3fedf9e77a373a9010d05 (patch) | |
tree | eeaa5c880679198ee1a9dff885a1c7fc6c84207e /app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers | |
parent | 5db95f245101b221ca5669c4e4b5e526d941d68f (diff) | |
download | frost-7460935f32748b10f6b3fedf9e77a373a9010d05.tar.gz frost-7460935f32748b10f6b3fedf9e77a373a9010d05.tar.bz2 frost-7460935f32748b10f6b3fedf9e77a373a9010d05.zip |
Move parsers to facebook folder (#1109)
Diffstat (limited to 'app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers')
4 files changed, 431 insertions, 0 deletions
diff --git a/app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/FrostParser.kt b/app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/FrostParser.kt new file mode 100644 index 00000000..5d023023 --- /dev/null +++ b/app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/FrostParser.kt @@ -0,0 +1,128 @@ +package com.pitchedapps.frost.facebook.parsers + +import com.pitchedapps.frost.dbflow.CookieModel +import com.pitchedapps.frost.facebook.FB_CSS_URL_MATCHER +import com.pitchedapps.frost.facebook.formattedFbUrl +import com.pitchedapps.frost.facebook.get +import com.pitchedapps.frost.services.NotificationContent +import com.pitchedapps.frost.utils.frostJsoup +import org.jsoup.Jsoup +import org.jsoup.nodes.Document +import org.jsoup.nodes.Element +import org.jsoup.select.Elements + +/** + * Created by Allan Wang on 2017-10-06. + * + * Interface for a given parser + * Use cases should be attached as delegates to objects that implement this interface + * + * In all cases, parsing will be done from a JSoup document + * Variants accepting strings are also permitted, and they will be converted to documents accordingly + * The return type must be nonnull if no parsing errors occurred, as null signifies a parse error + * If null really must be allowed, use Optionals + */ +interface FrostParser<out T : Any> { + + /** + * Name associated to parser + * Purely for display + */ + var nameRes: Int + + /** + * Url to request from + */ + val url: String + + /** + * Call parsing with default implementation using cookie + */ + fun parse(cookie: String?): ParseResponse<T>? + + /** + * Call parsing with given document + */ + fun parse(cookie: String?, document: Document): ParseResponse<T>? + + /** + * Call parsing using jsoup to fetch from given url + */ + fun parseFromUrl(cookie: String?, url: String): ParseResponse<T>? + + /** + * Call parsing with given data + */ + fun parseFromData(cookie: String?, text: String): ParseResponse<T>? + +} + +const val FALLBACK_TIME_MOD = 1000000 + +data class FrostLink(val text: String, val href: String) + +data class ParseResponse<out T>(val cookie: String, val data: T) { + override fun toString() = "ParseResponse\ncookie: $cookie\ndata:\n$data" +} + +interface ParseNotification { + fun getUnreadNotifications(data: CookieModel): List<NotificationContent> +} + +internal fun <T> List<T>.toJsonString(tag: String, indent: Int) = StringBuilder().apply { + val tabs = "\t".repeat(indent) + append("$tabs$tag: [\n\t$tabs") + append(this@toJsonString.joinToString("\n\t$tabs")) + append("\n$tabs]\n") +}.toString() + +/** + * T should have a readable toString() function + * [redirectToText] dictates whether all data should be converted to text then back to document before parsing + */ +internal abstract class FrostParserBase<out T : Any>(private val redirectToText: Boolean) : FrostParser<T> { + + final override fun parse(cookie: String?) = parseFromUrl(cookie, url) + + final override fun parseFromData(cookie: String?, text: String): ParseResponse<T>? { + cookie ?: return null + val doc = textToDoc(text) ?: return null + val data = parseImpl(doc) ?: return null + return ParseResponse(cookie, data) + } + + final override fun parseFromUrl(cookie: String?, url: String): ParseResponse<T>? = + parse(cookie, frostJsoup(cookie, url)) + + override fun parse(cookie: String?, document: Document): ParseResponse<T>? { + cookie ?: return null + if (redirectToText) + return parseFromData(cookie, document.toString()) + val data = parseImpl(document) ?: return null + return ParseResponse(cookie, data) + } + + protected abstract fun parseImpl(doc: Document): T? + + // protected abstract fun parse(doc: Document): T? + + /** + * Attempts to find inner <i> element with some style containing a url + * Returns the formatted url, or an empty string if nothing was found + */ + protected fun Element.getInnerImgStyle() = + select("i.img[style*=url]").getStyleUrl() + + protected fun Elements.getStyleUrl() = + FB_CSS_URL_MATCHER.find(attr("style"))[1]?.formattedFbUrl + + protected open fun textToDoc(text: String) = if (!redirectToText) + Jsoup.parse(text) + else + throw RuntimeException("${this::class.java.simpleName} requires text redirect but did not implement textToDoc") + + protected fun parseLink(element: Element?): FrostLink? { + val a = element?.getElementsByTag("a")?.first() ?: return null + return FrostLink(a.text(), a.attr("href")) + } +}
\ No newline at end of file diff --git a/app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/MessageParser.kt b/app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/MessageParser.kt new file mode 100644 index 00000000..f32c3452 --- /dev/null +++ b/app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/MessageParser.kt @@ -0,0 +1,126 @@ +package com.pitchedapps.frost.facebook.parsers + +import com.pitchedapps.frost.dbflow.CookieModel +import com.pitchedapps.frost.facebook.* +import com.pitchedapps.frost.services.NotificationContent +import com.pitchedapps.frost.utils.L +import org.apache.commons.text.StringEscapeUtils +import org.jsoup.Jsoup +import org.jsoup.nodes.Document +import org.jsoup.nodes.Element + +/** + * Created by Allan Wang on 2017-10-06. + * + * In Facebook, messages are passed through scripts and loaded into view via react afterwards + * We can parse out the content we want directly and load it ourselves + * + */ +object MessageParser : FrostParser<FrostMessages> by MessageParserImpl() { + + fun queryUser(cookie: String?, name: String) = parseFromUrl(cookie, "${FbItem.MESSAGES.url}/?q=$name") + +} + +data class FrostMessages(val threads: List<FrostThread>, + val seeMore: FrostLink?, + val extraLinks: List<FrostLink> +) : ParseNotification { + override fun toString() = StringBuilder().apply { + append("FrostMessages {\n") + append(threads.toJsonString("threads", 1)) + append("\tsee more: $seeMore\n") + append(extraLinks.toJsonString("extra links", 1)) + append("}") + }.toString() + + override fun getUnreadNotifications(data: CookieModel) = + threads.filter(FrostThread::unread).map { + with(it) { + NotificationContent( + data = data, + id = id, + href = url, + title = title, + text = content ?: "", + timestamp = time, + profileUrl = img + ) + } + } +} + +/** + * [id] user/thread id, or current time fallback + * [img] parsed url for profile img + * [time] time of message + * [url] link to thread + * [unread] true if image is unread, false otherwise + * [content] optional string for thread + */ +data class FrostThread(val id: Long, + val img: String?, + val title: String, + val time: Long, + val url: String, + val unread: Boolean, + val content: String?, + val contentImgUrl: String?) + +private class MessageParserImpl : FrostParserBase<FrostMessages>(true) { + + override var nameRes = FbItem.MESSAGES.titleId + + override val url = FbItem.MESSAGES.url + + override fun textToDoc(text: String): Document? { + var content = StringEscapeUtils.unescapeEcmaScript(text) + val begin = content.indexOf("id=\"threadlist_rows\"") + if (begin <= 0) { + L.d { "Threadlist not found" } + return null + } + content = content.substring(begin) + val end = content.indexOf("</script>") + if (end <= 0) { + L.d { "Script tail not found" } + return null + } + content = content.substring(0, end).substringBeforeLast("</div>") + return Jsoup.parseBodyFragment("<div $content") + } + + override fun parseImpl(doc: Document): FrostMessages? { + val threadList = doc.getElementById("threadlist_rows") ?: return null + val threads: List<FrostThread> = threadList.getElementsByAttributeValueContaining("id", "thread_fbid_") + .mapNotNull(this::parseMessage) + val seeMore = parseLink(doc.getElementById("see_older_threads")) + val extraLinks = threadList.nextElementSibling().select("a") + .mapNotNull(this::parseLink) + return FrostMessages(threads, seeMore, extraLinks) + } + + private fun parseMessage(element: Element): FrostThread? { + val a = element.getElementsByTag("a").first() ?: return null + val abbr = element.getElementsByTag("abbr") + val epoch = FB_EPOCH_MATCHER.find(abbr.attr("data-store"))[1]?.toLongOrNull() ?: -1L + //fetch id + val id = FB_MESSAGE_NOTIF_ID_MATCHER.find(element.id())[1]?.toLongOrNull() + ?: System.currentTimeMillis() % FALLBACK_TIME_MOD + val snippet = element.select("span.snippet").firstOrNull() + val content = snippet?.text()?.trim() + val contentImg = snippet?.select("i[style*=url]")?.getStyleUrl() + val img = element.getInnerImgStyle() + return FrostThread( + id = id, + img = img, + title = a.text(), + time = epoch, + url = a.attr("href").formattedFbUrl, + unread = !element.hasClass("acw"), + content = content, + contentImgUrl = contentImg + ) + } + +} diff --git a/app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/NotifParser.kt b/app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/NotifParser.kt new file mode 100644 index 00000000..03b913c7 --- /dev/null +++ b/app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/NotifParser.kt @@ -0,0 +1,100 @@ +package com.pitchedapps.frost.facebook.parsers + +import com.pitchedapps.frost.dbflow.CookieModel +import com.pitchedapps.frost.facebook.* +import com.pitchedapps.frost.services.NotificationContent +import org.jsoup.nodes.Document +import org.jsoup.nodes.Element + +/** + * Created by Allan Wang on 2017-12-25. + * + */ +object NotifParser : FrostParser<FrostNotifs> by NotifParserImpl() + +data class FrostNotifs( + val notifs: List<FrostNotif>, + val seeMore: FrostLink? +) : ParseNotification { + override fun toString() = StringBuilder().apply { + append("FrostNotifs {\n") + append(notifs.toJsonString("notifs", 1)) + append("\tsee more: $seeMore\n") + append("}") + }.toString() + + override fun getUnreadNotifications(data: CookieModel) = + notifs.filter(FrostNotif::unread).map { + with(it) { + NotificationContent( + data = data, + id = id, + href = url, + title = null, + text = content, + timestamp = time, + profileUrl = img + ) + } + } +} + +/** + * [id] notif id, or current time fallback + * [img] parsed url for profile img + * [time] time of message + * [url] link to thread + * [unread] true if image is unread, false otherwise + * [content] optional string for thread + * [timeString] text version of time from Facebook + * [thumbnailUrl] optional thumbnail url if existent + */ +data class FrostNotif(val id: Long, + val img: String?, + val time: Long, + val url: String, + val unread: Boolean, + val content: String, + val timeString: String, + val thumbnailUrl: String?) + +private class NotifParserImpl : FrostParserBase<FrostNotifs>(false) { + + override var nameRes = FbItem.NOTIFICATIONS.titleId + + override val url = FbItem.NOTIFICATIONS.url + + override fun parseImpl(doc: Document): FrostNotifs? { + val notificationList = doc.getElementById("notifications_list") ?: return null + val notifications = notificationList + .getElementsByAttributeValueContaining("id", "list_notif_") + .mapNotNull(this::parseNotif) + val seeMore = parseLink(doc.getElementsByAttributeValue("href", "/notifications.php?more").first()) + return FrostNotifs(notifications, seeMore) + } + + private fun parseNotif(element: Element): FrostNotif? { + val a = element.getElementsByTag("a").first() ?: return null + val abbr = element.getElementsByTag("abbr") + val epoch = FB_EPOCH_MATCHER.find(abbr.attr("data-store"))[1]?.toLongOrNull() ?: -1L + //fetch id + val id = FB_NOTIF_ID_MATCHER.find(element.id())[1]?.toLongOrNull() + ?: System.currentTimeMillis() % FALLBACK_TIME_MOD + val img = element.getInnerImgStyle() + val timeString = abbr.text() + val content = a.text().replace("\u00a0", " ").removeSuffix(timeString).trim() //remove + val thumbnail = element.selectFirst("img.thumbnail")?.attr("src") + return FrostNotif( + id = id, + img = img, + time = epoch, + url = a.attr("href").formattedFbUrl, + unread = !element.hasClass("acw"), + content = content, + timeString = timeString, + thumbnailUrl = if (thumbnail?.isNotEmpty() == true) thumbnail else null + ) + } + + +} diff --git a/app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/SearchParser.kt b/app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/SearchParser.kt new file mode 100644 index 00000000..d3367514 --- /dev/null +++ b/app/src/main/kotlin/com/pitchedapps/frost/facebook/parsers/SearchParser.kt @@ -0,0 +1,77 @@ +package com.pitchedapps.frost.facebook.parsers + +import ca.allanwang.kau.searchview.SearchItem +import com.pitchedapps.frost.facebook.FbItem +import com.pitchedapps.frost.facebook.formattedFbUrl +import com.pitchedapps.frost.facebook.parsers.FrostSearch.Companion.create +import com.pitchedapps.frost.utils.L +import org.jsoup.nodes.Document +import org.jsoup.nodes.Element + +/** + * Created by Allan Wang on 2017-10-09. + */ +object SearchParser : FrostParser<FrostSearches> by SearchParserImpl() { + fun query(cookie: String?, input: String): ParseResponse<FrostSearches>? { + val url = "${FbItem._SEARCH.url}?q=${if (input.isNotBlank()) input else "a"}" + L._i { "Search Query $url" } + return parseFromUrl(cookie, url) + } +} + +enum class SearchKeys(val key: String) { + USERS("keywords_users"), + EVENTS("keywords_events") +} + +data class FrostSearches(val results: List<FrostSearch>) { + + override fun toString() = StringBuilder().apply { + append("FrostSearches {\n") + append(results.toJsonString("results", 1)) + append("}") + }.toString() +} + +/** + * As far as I'm aware, all links are independent, so the queries don't matter + * A lot of it is tracking information, which I'll strip away + * Other text items are formatted for safety + * + * Note that it's best to create search results from [create] + */ +data class FrostSearch(val href: String, val title: String, val description: String?) { + + fun toSearchItem() = SearchItem(href, title, description) + + companion object { + fun create(href: String, title: String, description: String?) = FrostSearch( + with(href.indexOf("?")) { if (this == -1) href else href.substring(0, this) }, + title.format(), + description?.format() + ) + } +} + +private class SearchParserImpl : FrostParserBase<FrostSearches>(false) { + + override var nameRes = FbItem._SEARCH.titleId + + override val url = "${FbItem._SEARCH.url}?q=a" + + override fun parseImpl(doc: Document): FrostSearches? { + val container: Element = doc.getElementById("BrowseResultsContainer") + ?: doc.getElementById("root") + ?: return null + /** + * + * Removed [data-store*=result_id] + */ + return FrostSearches(container.select("a.touchable[href]").filter(Element::hasText).map { + FrostSearch.create(it.attr("href").formattedFbUrl, + it.select("._uoi").first()?.text() ?: "", + it.select("._1tcc").first()?.text()) + }.filter { it.title.isNotBlank() }) + } + +}
\ No newline at end of file |