From 29c463c7da91d2c08b1bf8f1a24e7dea4afabb0c Mon Sep 17 00:00:00 2001 From: grizzlyuser Date: Wed, 30 Dec 2020 21:48:02 +0200 Subject: libre/iceweasel: Add JSON processing script jq was not enough. It lacks functionality necessary to do all these changes in one pass, like verification by JSON schemas, or automatic unique timestamp generation for changed records. Python script can be updated in future to support more Remote Settings dumps, not just main/search-config and monitor/changes. --- libre/iceweasel/PKGBUILD | 10 +- libre/iceweasel/libre-process-json-files.py | 174 ++++++++++++++++++++++++++++ 2 files changed, 179 insertions(+), 5 deletions(-) create mode 100644 libre/iceweasel/libre-process-json-files.py (limited to 'libre') diff --git a/libre/iceweasel/PKGBUILD b/libre/iceweasel/PKGBUILD index 84542ba6f..77fff6406 100644 --- a/libre/iceweasel/PKGBUILD +++ b/libre/iceweasel/PKGBUILD @@ -62,8 +62,7 @@ makedepends=(unzip zip diffutils yasm mesa imake inetutils xorg-server-xvfb autoconf2.13 rust clang llvm jack gtk2 nodejs cbindgen nasm python-setuptools python-psutil python-zstandard lld) # FIXME: 'mozilla-serarchplugins' package needs re-working (see note in prepare()) -makedepends+=(quilt libxslt imagemagick git jq) -makedepends+=(rust=1:1.46.0) # FIXME: FTBS with rust v1.47.0 +makedepends+=(quilt libxslt imagemagick git jq python-jsonschema) optdepends=('networkmanager: Location detection via available WiFi networks' 'libnotify: Notification integration' 'pulseaudio: Audio support' @@ -76,8 +75,8 @@ source=(https://archive.mozilla.org/pub/firefox/releases/$pkgver/source/firefox- $pkgname.desktop) source+=(https://repo.parabola.nu/other/iceweasel/${pkgname}_${_brandingver}-${_brandingrel}.branding.tar.xz{,.sig} libre.patch - libre-searchengines.patch libre-0001-always-sync-remote-settings-with-local-dump.patch + libre-process-json-files.py vendor.js.in) source_armv7h=(arm.patch build-arm-libopus.patch) @@ -338,7 +337,6 @@ END ## libre patching ## - # Remove remaining non-free bits # Remove test-related networking dumps, because they contain code from # some Amazon webpage with no clear licensing, thus nonfree. # Also they interfere with checking of Remote Settings patching done later, @@ -374,7 +372,9 @@ END -e '!third_party/python/**/*.egg-info/' rm -rf .git - # Patch and remove anything that's left + python ../libre-process-json-files.py "$srcdir/firefox-$pkgver" "${brandingsrcdir}" + + # Remove remaining non-free bits echo "applying libre.patch" patch -Np1 --no-backup-if-mismatch -i "${srcdir}"/libre.patch } diff --git a/libre/iceweasel/libre-process-json-files.py b/libre/iceweasel/libre-process-json-files.py new file mode 100644 index 000000000..2fdde62d4 --- /dev/null +++ b/libre/iceweasel/libre-process-json-files.py @@ -0,0 +1,174 @@ +#! /usr/bin/python3 + +# Copyright (C) 2020 grizzlyuser +# Based on: https://gitlab.trisquel.org/trisquel/wrapage-helpers/-/blob/81881d89b2bf7d502dd14fcccdb471fec6f6b206/helpers/DATA/firefox/reprocess-search-config.py +# Below is the notice from the original author: +# +# Copyright (C) 2020 Ruben Rodriguez +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +import json +import sys +import time +import copy +import argparse +import pathlib +from collections import namedtuple +from jsonschema import validate + +parser = argparse.ArgumentParser() +parser.add_argument( + 'MAIN_PATH', + type=pathlib.Path, + help='path to main application source code directory') +parser.add_argument( + 'BRANDING_PATH', + type=pathlib.Path, + help='path to branding source code directory') +parser.add_argument( + '-i', + '--indent', + type=int, + help='indent for pretty printing of output files') +arguments = parser.parse_args() + +File = namedtuple('File', ['path', 'content']) + + +class RemoteSettings: + DUMPS_PATH = arguments.MAIN_PATH / 'services/settings/dumps' + JSON_PATHS = tuple(DUMPS_PATH.glob('*/*.json')) + WRAPPER_NAME = 'data' + + @classmethod + def wrap(cls, processed): + return File(processed.path, {cls.WRAPPER_NAME: processed.content}) + + @classmethod + def unwrap(cls, parsed_jsons): + return [File(json.path, json.content[cls.WRAPPER_NAME]) + for json in parsed_jsons] + + @classmethod + def process_raw(cls, unwrapped_jsons): + changes = [] + output_path = cls.DUMPS_PATH / 'monitor/changes.json' + + for collection in unwrapped_jsons: + if collection.path == cls.DUMPS_PATH / 'main/example.json': + continue + latest_change = {} + latest_change['last_modified'] = max( + (record['last_modified'] for record in collection.content), default=0) + latest_change['bucket'] = collection.path.parent.name + latest_change['collection'] = collection.path.stem + changes.append(latest_change) + + output_path.parent.mkdir(exist_ok=True) + + return File(output_path, changes) + + @classmethod + def process(cls, parsed_jsons): + return cls.wrap(cls.process_raw(cls.unwrap(parsed_jsons))) + + +class SearchConfig(RemoteSettings): + JSON_PATHS = (RemoteSettings.DUMPS_PATH / 'main/search-config.json',) + + def _get_schema(): + PATH = arguments.MAIN_PATH / \ + 'toolkit/components/search/schema/search-engine-config-schema.json' + with PATH.open() as file: + return json.load(file) + + @classmethod + def process_raw(cls, unwrapped_jsons): + _WHITELIST = ('ddg@search.mozilla.org', 'wikipedia@search.mozilla.org') + SCHEMA = cls._get_schema() + + search_engines, timestamps = [], [] + search_config = unwrapped_jsons[0] + + for search_engine in search_config.content: + if search_engine['webExtension']['id'] in _WHITELIST: + clone = copy.deepcopy(search_engine) + + if 'telemetryId' in search_engine: + del search_engine['telemetryId'] + if 'extraParams' in search_engine: + del search_engine['extraParams'] + + general_specifier = {} + for specifier in search_engine['appliesTo'].copy(): + if 'application' in specifier: + if 'distributions' in specifier['application']: + search_engine['appliesTo'].remove(specifier) + continue + if 'extraParams' in specifier['application']: + del specifier['application']['extraParams'] + + if 'included' in specifier and 'everywhere' in specifier[ + 'included'] and specifier['included']['everywhere']: + general_specifier = specifier + + if not general_specifier: + general_specifier = {'included': {'everywhere': True}} + search_engine['appliesTo'].insert(0, general_specifier) + if search_engine['webExtension']['id'] == _WHITELIST[0]: + general_specifier['default'] = 'yes' + + if clone != search_engine: + timestamp = int(round(time.time_ns() / 10 ** 6)) + while timestamp in timestamps: + timestamp += 1 + timestamps.append(timestamp) + search_engine['last_modified'] = timestamp + + validate(search_engine, schema=SCHEMA) + + search_engines.append(search_engine) + + return File(search_config.path, search_engines) + + +class TopSites: + JSON_PATHS = ( + arguments.MAIN_PATH / + 'browser/components/newtab/data/content/tippytop/top_sites.json', + arguments.BRANDING_PATH / + 'tippytop/top_sites.json') + + @classmethod + def process(cls, parsed_jsons): + main_top_sites = parsed_jsons[0] + branding_top_sites = parsed_jsons[1] + result = branding_top_sites.content + \ + [site for site in main_top_sites.content if site['title'] == 'wikipedia'] + return File(main_top_sites.path, result) + + +processors = (SearchConfig, TopSites, RemoteSettings) + +for processor in processors: + parsed_jsons = [] + for json_path in processor.JSON_PATHS: + with json_path.open() as file: + parsed_jsons.append(File(json_path, json.load(file))) + + processed = processor.process(parsed_jsons) + with processed.path.open('w') as file: + json.dump(processed.content, file, indent=arguments.indent) -- cgit v1.2.3