diff options
Diffstat (limited to 'app/src/main/kotlin/com/pitchedapps/frost/parsers')
4 files changed, 256 insertions, 109 deletions
diff --git a/app/src/main/kotlin/com/pitchedapps/frost/parsers/FrostParser.kt b/app/src/main/kotlin/com/pitchedapps/frost/parsers/FrostParser.kt index 186633e5..016f33e8 100644 --- a/app/src/main/kotlin/com/pitchedapps/frost/parsers/FrostParser.kt +++ b/app/src/main/kotlin/com/pitchedapps/frost/parsers/FrostParser.kt @@ -1,6 +1,14 @@ package com.pitchedapps.frost.parsers +import com.pitchedapps.frost.dbflow.CookieModel +import com.pitchedapps.frost.facebook.FB_CSS_URL_MATCHER +import com.pitchedapps.frost.facebook.formattedFbUrl +import com.pitchedapps.frost.facebook.get +import com.pitchedapps.frost.services.NotificationContent +import com.pitchedapps.frost.utils.frostJsoup +import org.jsoup.Jsoup import org.jsoup.nodes.Document +import org.jsoup.nodes.Element /** * Created by Allan Wang on 2017-10-06. @@ -13,80 +21,88 @@ import org.jsoup.nodes.Document * The return type must be nonnull if no parsing errors occurred, as null signifies a parse error * If null really must be allowed, use Optionals */ -interface FrostParser<T> { +interface FrostParser<out T : Any> { + /** - * Extracts data from the JSoup document - * In some cases, the document can be created directly from a connection - * In other times, it needs to be created from scripts, which otherwise - * won't be parsed + * Url to request from */ - fun parse(doc: Document): T? + val url: String /** - * Parse a String input + * Call parsing with default implementation using cookie */ - fun parse(text: String?): T? + fun parse(cookie: String?): ParseResponse<T>? /** - * Take in doc and emit debug output + * Call parsing with given document */ - fun debug(doc: Document): String + fun parse(cookie: String?, document: Document): ParseResponse<T>? /** - * Attempts to parse input and emit a debugger + * Call parsing with given data */ - fun debug(text: String?): String + fun parseFromData(cookie: String?, text: String): ParseResponse<T>? + +} + +data class FrostLink(val text: String, val href: String) + +data class ParseResponse<out T>(val cookie: String, val data: T) { + override fun toString() = "ParseResponse\ncookie: $cookie\ndata:\n$data" } -internal abstract class FrostParserBase<T> : FrostParser<T> { +interface ParseNotification { + fun getUnreadNotifications(data: CookieModel): List<NotificationContent> +} + +internal fun <T> List<T>.toJsonString(tag: String, indent: Int) = StringBuilder().apply { + val tabs = "\t".repeat(indent) + append("$tabs$tag: [\n\t$tabs") + append(this@toJsonString.joinToString("\n\t$tabs")) + append("\n$tabs]\n") +}.toString() + +/** + * T should have a readable toString() function + * [redirectToText] dictates whether all data should be converted to text then back to document before parsing + */ +internal abstract class FrostParserBase<out T : Any>(private val redirectToText: Boolean) : FrostParser<T> { + + override final fun parse(cookie: String?) = parse(cookie, frostJsoup(cookie, url)) - override final fun parse(text: String?): T? { - text ?: return null + override final fun parseFromData(cookie: String?, text: String): ParseResponse<T>? { + cookie ?: return null val doc = textToDoc(text) ?: return null - return parse(doc) + val data = parseImpl(doc) ?: return null + return ParseResponse(cookie, data) } - protected abstract fun textToDoc(text: String): Document? - - override fun debug(text: String?): String { - val result = mutableListOf<String>() - result.add("Testing parser for ${this::class.java.simpleName}") - if (text == null) { - result.add("Null text input") - return result.joinToString("\n") - } - val doc = textToDoc(text) - if (doc == null) { - result.add("Null document from text") - return result.joinToString("\n") - } - return debug(doc, result) + override fun parse(cookie: String?, document: Document): ParseResponse<T>? { + cookie ?: return null + if (redirectToText) + return parseFromData(cookie, document.toString()) + val data = parseImpl(document) ?: return null + return ParseResponse(cookie, data) } - override final fun debug(doc: Document): String { - val result = mutableListOf<String>() - result.add("Testing parser for ${this::class.java.simpleName}") - return debug(doc, result) - } + protected abstract fun parseImpl(doc: Document): T? - private fun debug(doc: Document, result: MutableList<String>): String { - val output = parse(doc) - if (output == null) { - result.add("Output is null") - return result.joinToString("\n") - } else { - result.add("Output is not null") - } - debugImpl(output, result) - return result.joinToString("\n") - } + // protected abstract fun parse(doc: Document): T? - protected abstract fun debugImpl(data: T, result: MutableList<String>) -} + /** + * Attempts to find inner <i> element with some style containing a url + * Returns the formatted url, or an empty string if nothing was found + */ + protected fun Element.getInnerImgStyle() = + FB_CSS_URL_MATCHER.find(select("i.img[style*=url]").attr("style"))[1]?.formattedFbUrl ?: "" -object FrostRegex { - val epoch = Regex(":([0-9]+)") - val notifId = Regex("notif_id\":([0-9]+)") - val messageNotifId = Regex("thread_fbid_([0-9]+)") - val profilePicture = Regex("url\\(\"(.*?)\"\\)") + protected open fun textToDoc(text: String) = if (!redirectToText) + Jsoup.parse(text) + else + throw RuntimeException("${this::class.java.simpleName} requires text redirect but did not implement textToDoc") + + protected fun parseLink(element: Element?): FrostLink? { + val a = element?.getElementsByTag("a")?.first() ?: return null + return FrostLink(a.text(), a.attr("href")) + } }
\ No newline at end of file diff --git a/app/src/main/kotlin/com/pitchedapps/frost/parsers/MessageParser.kt b/app/src/main/kotlin/com/pitchedapps/frost/parsers/MessageParser.kt index 9430407d..9d4a2193 100644 --- a/app/src/main/kotlin/com/pitchedapps/frost/parsers/MessageParser.kt +++ b/app/src/main/kotlin/com/pitchedapps/frost/parsers/MessageParser.kt @@ -1,6 +1,8 @@ package com.pitchedapps.frost.parsers -import com.pitchedapps.frost.facebook.formattedFbUrl +import com.pitchedapps.frost.dbflow.CookieModel +import com.pitchedapps.frost.facebook.* +import com.pitchedapps.frost.services.NotificationContent import com.pitchedapps.frost.utils.L import org.apache.commons.text.StringEscapeUtils import org.jsoup.Jsoup @@ -14,13 +16,55 @@ import org.jsoup.nodes.Element * We can parse out the content we want directly and load it ourselves * */ -object MessageParser : FrostParser<Triple<List<FrostThread>, FrostLink?, List<FrostLink>>> by MessageParserImpl() +object MessageParser : FrostParser<FrostMessages> by MessageParserImpl() -data class FrostThread(val id: Int, val img: String, val title: String, val time: Long, val url: String, val unread: Boolean, val content: String?) +data class FrostMessages(val threads: List<FrostThread>, + val seeMore: FrostLink?, + val extraLinks: List<FrostLink> +) : ParseNotification { + override fun toString() = StringBuilder().apply { + append("FrostMessages {\n") + append(threads.toJsonString("threads", 1)) + append("\tsee more: $seeMore\n") + append(extraLinks.toJsonString("extra links", 1)) + append("}") + }.toString() -data class FrostLink(val text: String, val href: String) + override fun getUnreadNotifications(data: CookieModel) = + threads.filter(FrostThread::unread).map { + with(it) { + NotificationContent( + data = data, + notifId = Math.abs(id.toInt()), + href = url, + title = title, + text = content ?: "", + timestamp = time, + profileUrl = img + ) + } + } +} + +/** + * [id] user/thread id, or current time fallback + * [img] parsed url for profile img + * [time] time of message + * [url] link to thread + * [unread] true if image is unread, false otherwise + * [content] optional string for thread + */ +data class FrostThread(val id: Long, + val img: String, + val title: String, + val time: Long, + val url: String, + val unread: Boolean, + val content: String?) -private class MessageParserImpl : FrostParserBase<Triple<List<FrostThread>, FrostLink?, List<FrostLink>>>() { +private class MessageParserImpl : FrostParserBase<FrostMessages>(true) { + + override val url = FbItem.MESSAGES.url override fun textToDoc(text: String): Document? { var content = StringEscapeUtils.unescapeEcmaScript(text) @@ -39,32 +83,29 @@ private class MessageParserImpl : FrostParserBase<Triple<List<FrostThread>, Fros return Jsoup.parseBodyFragment("<div $content") } - override fun parse(doc: Document): Triple<List<FrostThread>, FrostLink?, List<FrostLink>>? { - val threadList = doc.getElementById("threadlist_rows") + override fun parseImpl(doc: Document): FrostMessages? { + val threadList = doc.getElementById("threadlist_rows") ?: return null val threads: List<FrostThread> = threadList.getElementsByAttributeValueContaining("id", "thread_fbid_") - .mapNotNull { parseMessage(it) } + .mapNotNull(this::parseMessage) val seeMore = parseLink(doc.getElementById("see_older_threads")) val extraLinks = threadList.nextElementSibling().select("a") - .mapNotNull { parseLink(it) } - return Triple(threads, seeMore, extraLinks) + .mapNotNull(this::parseLink) + return FrostMessages(threads, seeMore, extraLinks) } private fun parseMessage(element: Element): FrostThread? { val a = element.getElementsByTag("a").first() ?: return null val abbr = element.getElementsByTag("abbr") - val epoch = FrostRegex.epoch.find(abbr.attr("data-store")) - ?.groupValues?.getOrNull(1)?.toLongOrNull() ?: -1L + val epoch = FB_EPOCH_MATCHER.find(abbr.attr("data-store"))[1]?.toLongOrNull() ?: -1L //fetch id - val id = FrostRegex.messageNotifId.find(element.id()) - ?.groupValues?.getOrNull(1)?.toLongOrNull() ?: System.currentTimeMillis() + val id = FB_MESSAGE_NOTIF_ID_MATCHER.find(element.id())[1]?.toLongOrNull() + ?: System.currentTimeMillis() val content = element.select("span.snippet").firstOrNull()?.text()?.trim() - //fetch convo pic - val p = element.select("i.img[style*=url]") - val pUrl = FrostRegex.profilePicture.find(p.attr("style"))?.groups?.get(1)?.value?.formattedFbUrl ?: "" + val img = element.getInnerImgStyle() L.v("url", a.attr("href")) return FrostThread( - id = id.toInt(), - img = pUrl.formattedFbUrl, + id = id, + img = img, title = a.text(), time = epoch, url = a.attr("href").formattedFbUrl, @@ -73,15 +114,4 @@ private class MessageParserImpl : FrostParserBase<Triple<List<FrostThread>, Fros ) } - private fun parseLink(element: Element?): FrostLink? { - val a = element?.getElementsByTag("a")?.first() ?: return null - return FrostLink(a.text(), a.attr("href")) - } - - override fun debugImpl(data: Triple<List<FrostThread>, FrostLink?, List<FrostLink>>, result: MutableList<String>) { - result.addAll(data.first.map(FrostThread::toString)) - result.add("See more link:") - result.add("\t${data.second}") - result.addAll(data.third.map(FrostLink::toString)) - } } diff --git a/app/src/main/kotlin/com/pitchedapps/frost/parsers/NotifParser.kt b/app/src/main/kotlin/com/pitchedapps/frost/parsers/NotifParser.kt new file mode 100644 index 00000000..f743a43a --- /dev/null +++ b/app/src/main/kotlin/com/pitchedapps/frost/parsers/NotifParser.kt @@ -0,0 +1,92 @@ +package com.pitchedapps.frost.parsers + +import com.pitchedapps.frost.dbflow.CookieModel +import com.pitchedapps.frost.facebook.* +import com.pitchedapps.frost.services.NotificationContent +import com.pitchedapps.frost.utils.L +import org.jsoup.nodes.Document +import org.jsoup.nodes.Element + +/** + * Created by Allan Wang on 2017-12-25. + * + */ +object NotifParser : FrostParser<FrostNotifs> by NotifParserImpl() + +data class FrostNotifs( + val notifs: List<FrostNotif>, + val seeMore: FrostLink? +) : ParseNotification { + override fun toString() = StringBuilder().apply { + append("FrostNotifs {\n") + append(notifs.toJsonString("notifs", 1)) + append("\tsee more: $seeMore\n") + append("}") + }.toString() + + override fun getUnreadNotifications(data: CookieModel) = + notifs.filter(FrostNotif::unread).map { + with(it) { + NotificationContent( + data = data, + notifId = Math.abs(id.toInt()), + href = url, + title = null, + text = content ?: "", + timestamp = time, + profileUrl = img + ) + } + } +} + +/** + * [id] notif id, or current time fallback + * [img] parsed url for profile img + * [time] time of message + * [url] link to thread + * [unread] true if image is unread, false otherwise + * [content] optional string for thread + */ +data class FrostNotif(val id: Long, + val img: String, + val time: Long, + val url: String, + val unread: Boolean, + val content: String?) + +private class NotifParserImpl : FrostParserBase<FrostNotifs>(false) { + + override val url = FbItem.NOTIFICATIONS.url + + override fun parseImpl(doc: Document): FrostNotifs? { + val notificationList = doc.getElementById("notifications_list") ?: return null + val notifications = notificationList.getElementsByAttributeValueContaining("id", "list_notif_") + .mapNotNull { parseNotif(it) } + val seeMore = parseLink(doc.getElementsByAttributeValue("href", "/notifications.php?more").first()) + return FrostNotifs(notifications, seeMore) + } + + private fun parseNotif(element: Element): FrostNotif? { + val a = element.getElementsByTag("a").first() ?: return null + val abbr = element.getElementsByTag("abbr") + val epoch = FB_EPOCH_MATCHER.find(abbr.attr("data-store"))[1]?.toLongOrNull() ?: -1L + //fetch id + val id = FB_NOTIF_ID_MATCHER.find(element.id())[1]?.toLongOrNull() + ?: System.currentTimeMillis() + val img = element.getInnerImgStyle() + val timeString = abbr.text() + val content = a.text().replace("\u00a0", " ").removeSuffix(timeString).trim() //remove + L.v("url", a.attr("href")) + return FrostNotif( + id = id, + img = img, + time = epoch, + url = a.attr("href").formattedFbUrl, + unread = !element.hasClass("acw"), + content = content + ) + } + + +} diff --git a/app/src/main/kotlin/com/pitchedapps/frost/parsers/SearchParser.kt b/app/src/main/kotlin/com/pitchedapps/frost/parsers/SearchParser.kt index 908bb153..bc09d4db 100644 --- a/app/src/main/kotlin/com/pitchedapps/frost/parsers/SearchParser.kt +++ b/app/src/main/kotlin/com/pitchedapps/frost/parsers/SearchParser.kt @@ -1,8 +1,10 @@ package com.pitchedapps.frost.parsers -import ca.allanwang.kau.utils.withMaxLength +import ca.allanwang.kau.searchview.SearchItem +import com.pitchedapps.frost.dbflow.CookieModel import com.pitchedapps.frost.facebook.FbItem import com.pitchedapps.frost.facebook.formattedFbUrl +import com.pitchedapps.frost.parsers.FrostSearch.Companion.create import com.pitchedapps.frost.utils.L import com.pitchedapps.frost.utils.frostJsoup import org.jsoup.Jsoup @@ -12,11 +14,11 @@ import org.jsoup.nodes.Element /** * Created by Allan Wang on 2017-10-09. */ -object SearchParser : FrostParser<List<FrostSearch>> by SearchParserImpl() { - fun query(input: String): List<FrostSearch>? { +object SearchParser : FrostParser<FrostSearches> by SearchParserImpl() { + fun query(cookie: String?, input: String): ParseResponse<FrostSearches>? { val url = "${FbItem._SEARCH.url}?q=${if (input.isNotBlank()) input else "a"}" L.i(null, "Search Query $url") - return parse(frostJsoup(url)) + return parse(cookie, frostJsoup(url)) } } @@ -25,25 +27,40 @@ enum class SearchKeys(val key: String) { EVENTS("keywords_events") } +data class FrostSearches(val results: List<FrostSearch>) { + + override fun toString() = StringBuilder().apply { + append("FrostSearches {\n") + append(results.toJsonString("results", 1)) + append("}") + }.toString() +} + /** * As far as I'm aware, all links are independent, so the queries don't matter * A lot of it is tracking information, which I'll strip away * Other text items are formatted for safety + * + * Note that it's best to create search results from [create] */ -class FrostSearch(href: String, title: String, description: String?) { - val href = with(href.indexOf("?")) { if (this == -1) href else href.substring(0, this) } - val title = title.format() - val description = description?.format() +data class FrostSearch(val href: String, val title: String, val description: String?) { - private fun String.format() = replace("\n", " ").withMaxLength(50) - - override fun toString(): String - = "FrostSearch(href=$href, title=$title, description=$description)" + fun toSearchItem() = SearchItem(href, title, description) + companion object { + fun create(href: String, title: String, description: String?) = FrostSearch( + with(href.indexOf("?")) { if (this == -1) href else href.substring(0, this) }, + title.format(), + description?.format() + ) + } } -private class SearchParserImpl : FrostParserBase<List<FrostSearch>>() { - override fun parse(doc: Document): List<FrostSearch>? { +private class SearchParserImpl : FrostParserBase<FrostSearches>(false) { + + override val url = "${FbItem._SEARCH.url}?q=a" + + override fun parseImpl(doc: Document): FrostSearches? { val container: Element = doc.getElementById("BrowseResultsContainer") ?: doc.getElementById("root") ?: return null @@ -51,19 +68,11 @@ private class SearchParserImpl : FrostParserBase<List<FrostSearch>>() { * * Removed [data-store*=result_id] */ - return container.select("a.touchable[href]").filter(Element::hasText).map { - FrostSearch(it.attr("href").formattedFbUrl, + return FrostSearches(container.select("a.touchable[href]").filter(Element::hasText).map { + FrostSearch.create(it.attr("href").formattedFbUrl, it.select("._uoi").first()?.text() ?: "", it.select("._1tcc").first()?.text()) - }.filter { it.title.isNotBlank() } - } - - - override fun textToDoc(text: String): Document? = Jsoup.parse(text) - - override fun debugImpl(data: List<FrostSearch>, result: MutableList<String>) { - result.add("Has size ${data.size}") - result.addAll(data.map(FrostSearch::toString)) + }.filter { it.title.isNotBlank() }) } }
\ No newline at end of file |