From 9f45f935ac5c57381ee9580d84f40189c2ba8994 Mon Sep 17 00:00:00 2001 From: grizzlyuser Date: Sun, 4 Apr 2021 23:16:26 +0300 Subject: libre/iceweasel: JSON processing script updates Refactor and add processing of top-sites.json Remote Settings dump, which is needed to override default sites with nonfree content which are displayed on new tab and in the dropdown of address bar. --- libre/iceweasel/PKGBUILD | 2 +- libre/iceweasel/process-json-files.py | 207 ++++++++++++++++++++++------------ 2 files changed, 133 insertions(+), 76 deletions(-) diff --git a/libre/iceweasel/PKGBUILD b/libre/iceweasel/PKGBUILD index 191ebdb88..1927046a3 100644 --- a/libre/iceweasel/PKGBUILD +++ b/libre/iceweasel/PKGBUILD @@ -93,7 +93,7 @@ sha256sums+=('79c957263eb8eb99cdb5cbadf689883b04d4ad197ab956a1dbd1576bfbdce9ae' 'SKIP' '46171d428125c524c03588f1c5fbb7778e68ef558b5faa54be84f4df5e546427' 'a6afaac11aa0b8ff6d32470681df54a2acfe8f8b62afe0c19dc168b1424ba098' - '6c5b6dda26503a7d4e00b81249225cc5ba03c3bde8b14bb65cb47648e372efee' + 'c38c5f5937bcedcd5645d63bf5787e0336d0b006b29f64c45414bc3a6e83c3be' '714998c5fc379f54a66ff80a845b7880169cd5b4094b77b719a99d33b65c0940') validpgpkeys=('14F26682D0916CDD81E37B6D61B7B526D98F0353') # Mozilla Software Releases validpgpkeys+=('BFA8008A8265677063B11BF47171986E4B745536') # Andreas Grapentin diff --git a/libre/iceweasel/process-json-files.py b/libre/iceweasel/process-json-files.py index a972e90e2..69264dc94 100644 --- a/libre/iceweasel/process-json-files.py +++ b/libre/iceweasel/process-json-files.py @@ -1,6 +1,6 @@ #! /usr/bin/python3 -# Copyright (C) 2020 grizzlyuser +# Copyright (C) 2020, 2021 grizzlyuser # Based on: https://gitlab.trisquel.org/trisquel/wrapage-helpers/-/blob/81881d89b2bf7d502dd14fcccdb471fec6f6b206/helpers/DATA/firefox/reprocess-search-config.py # Below is the notice from the original author: # @@ -42,6 +42,7 @@ parser.add_argument( '-i', '--indent', type=int, + default=2, help='indent for pretty printing of output files') arguments = parser.parse_args() @@ -49,103 +50,127 @@ File = namedtuple('File', ['path', 'content']) class RemoteSettings: - DUMPS_PATH = arguments.MAIN_PATH / 'services/settings/dumps' - JSON_PATHS = tuple(DUMPS_PATH.glob('*/*.json')) - WRAPPER_NAME = 'data' + DUMPS_PATH_RELATIVE = 'services/settings/dumps' + DUMPS_PATH_ABSOLUTE = arguments.MAIN_PATH / DUMPS_PATH_RELATIVE + + _WRAPPER_NAME = 'data' @classmethod def wrap(cls, processed): - return File(processed.path, {cls.WRAPPER_NAME: processed.content}) + return File(processed.path, {cls._WRAPPER_NAME: processed.content}) @classmethod def unwrap(cls, parsed_jsons): - return [File(json.path, json.content[cls.WRAPPER_NAME]) + return [File(json.path, json.content[cls._WRAPPER_NAME]) for json in parsed_jsons] @classmethod - def process_raw(cls, unwrapped_jsons): - changes = [] - output_path = cls.DUMPS_PATH / 'monitor/changes.json' + def should_modify_collection(cls, collection): + return True + @classmethod + def process_raw(cls, unwrapped_jsons, parsed_schema): + timestamps, result = [], [] for collection in unwrapped_jsons: - if collection.path == cls.DUMPS_PATH / 'main/example.json': - continue - latest_change = {} - latest_change['last_modified'] = max( - (record['last_modified'] for record in collection.content), default=0) - latest_change['bucket'] = collection.path.parent.name - latest_change['collection'] = collection.path.stem - changes.append(latest_change) + should_modify_collection = cls.should_modify_collection(collection) + for record in collection.content: + if should_modify_collection: + if cls.should_drop_record(record): + continue - output_path.parent.mkdir(exist_ok=True) + clone = copy.deepcopy(record) - return File(output_path, changes) + record = cls.process_record(record) - @classmethod - def process(cls, parsed_jsons): - return cls.wrap(cls.process_raw(cls.unwrap(parsed_jsons))) + if clone != record: + timestamp = int(round(time.time_ns() / 10 ** 6)) + while timestamp in timestamps: + timestamp += 1 + timestamps.append(timestamp) + record['last_modified'] = timestamp + if parsed_schema is not None: + validate(record, schema=parsed_schema) -class SearchConfig(RemoteSettings): - JSON_PATHS = (RemoteSettings.DUMPS_PATH / 'main/search-config.json',) + result.append(record) + + cls.OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True) - def _get_schema(): - PATH = arguments.MAIN_PATH / \ - 'toolkit/components/search/schema/search-engine-config-schema.json' - with PATH.open() as file: - return json.load(file) + return File(cls.OUTPUT_PATH, result) @classmethod - def process_raw(cls, unwrapped_jsons): - _WHITELIST = ('ddg@search.mozilla.org', 'wikipedia@search.mozilla.org') - SCHEMA = cls._get_schema() + def process(cls, parsed_jsons, parsed_schema): + return cls.wrap( + cls.process_raw( + cls.unwrap(parsed_jsons), + parsed_schema)) - search_engines, timestamps = [], [] - search_config = unwrapped_jsons[0] - for search_engine in search_config.content: - if search_engine['webExtension']['id'] in _WHITELIST: - clone = copy.deepcopy(search_engine) +class Changes(RemoteSettings): + JSON_PATHS = tuple(RemoteSettings.DUMPS_PATH_ABSOLUTE.glob('*/*.json')) + OUTPUT_PATH = RemoteSettings.DUMPS_PATH_ABSOLUTE / 'monitor/changes.json' - if 'telemetryId' in search_engine: - del search_engine['telemetryId'] - if 'extraParams' in search_engine: - del search_engine['extraParams'] + @classmethod + def process_raw(cls, unwrapped_jsons, parsed_schema): + changes = [] + + for collection in unwrapped_jsons: + if collection.path != RemoteSettings.DUMPS_PATH_ABSOLUTE / 'main/example.json': + latest_change = {} + latest_change['last_modified'] = max( + (record['last_modified'] for record in collection.content), default=0) + latest_change['bucket'] = collection.path.parent.name + latest_change['collection'] = collection.path.stem + changes.append(latest_change) + + cls.OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True) + + return File(cls.OUTPUT_PATH, changes) + + +class SearchConfig(RemoteSettings): + JSON_PATHS = ( + RemoteSettings.DUMPS_PATH_ABSOLUTE / + 'main/search-config.json', + ) + SCHEMA_PATH = arguments.MAIN_PATH / \ + 'toolkit/components/search/schema/search-engine-config-schema.json' + OUTPUT_PATH = JSON_PATHS[0] - general_specifier = {} - for specifier in search_engine['appliesTo'].copy(): - if 'application' in specifier: - if 'distributions' in specifier['application']: - search_engine['appliesTo'].remove(specifier) - continue - if 'extraParams' in specifier['application']: - del specifier['application']['extraParams'] + _DUCKDUCKGO_SEARCH_ENGINE_ID = 'ddg@search.mozilla.org' - if 'included' in specifier and 'everywhere' in specifier[ - 'included'] and specifier['included']['everywhere']: - general_specifier = specifier + @classmethod + def should_drop_record(cls, search_engine): + return search_engine['webExtension']['id'] not in ( + cls._DUCKDUCKGO_SEARCH_ENGINE_ID, 'wikipedia@search.mozilla.org') - if not general_specifier: - general_specifier = {'included': {'everywhere': True}} - search_engine['appliesTo'].insert(0, general_specifier) - if search_engine['webExtension']['id'] == _WHITELIST[0]: - general_specifier['default'] = 'yes' + @classmethod + def process_record(cls, search_engine): + [search_engine.pop(key, None) + for key in ['extraParams', 'telemetryId']] - if clone != search_engine: - timestamp = int(round(time.time_ns() / 10 ** 6)) - while timestamp in timestamps: - timestamp += 1 - timestamps.append(timestamp) - search_engine['last_modified'] = timestamp + general_specifier = {} + for specifier in search_engine['appliesTo'].copy(): + if 'application' in specifier: + if 'distributions' in specifier['application']: + search_engine['appliesTo'].remove(specifier) + continue + specifier['application'].pop('extraParams', None) - validate(search_engine, schema=SCHEMA) + if 'included' in specifier and 'everywhere' in specifier[ + 'included'] and specifier['included']['everywhere']: + general_specifier = specifier - search_engines.append(search_engine) + if not general_specifier: + general_specifier = {'included': {'everywhere': True}} + search_engine['appliesTo'].insert(0, general_specifier) + if search_engine['webExtension']['id'] == cls._DUCKDUCKGO_SEARCH_ENGINE_ID: + general_specifier['default'] = 'yes' - return File(search_config.path, search_engines) + return search_engine -class TopSites: +class TippyTopSites: JSON_PATHS = ( arguments.MAIN_PATH / 'browser/components/newtab/data/content/tippytop/top_sites.json', @@ -153,15 +178,42 @@ class TopSites: 'tippytop/top_sites.json') @classmethod - def process(cls, parsed_jsons): - main_top_sites = parsed_jsons[0] - branding_top_sites = parsed_jsons[1] - result = branding_top_sites.content + \ - [site for site in main_top_sites.content if 'wikipedia.org' in site['domains']] - return File(main_top_sites.path, result) + def process(cls, parsed_jsons, parsed_schema): + tippy_top_sites_main = parsed_jsons[0] + tippy_top_sites_branding = parsed_jsons[1] + result = tippy_top_sites_branding.content + \ + [site for site in tippy_top_sites_main.content if 'wikipedia.org' in site['domains']] + return File(tippy_top_sites_main.path, result) + +class TopSites(RemoteSettings): + _TOP_SITES_JSON_PATH = 'main/top-sites.json' + _TOP_SITES_PATH_MAIN = RemoteSettings.DUMPS_PATH_ABSOLUTE / _TOP_SITES_JSON_PATH -processors = (SearchConfig, TopSites, RemoteSettings) + JSON_PATHS = ( + arguments.BRANDING_PATH / + RemoteSettings.DUMPS_PATH_RELATIVE / + _TOP_SITES_JSON_PATH, + _TOP_SITES_PATH_MAIN) + OUTPUT_PATH = _TOP_SITES_PATH_MAIN + + @classmethod + def should_modify_collection(cls, collection): + return cls._TOP_SITES_PATH_MAIN == collection.path + + @classmethod + def should_drop_record(cls, site): + return site['url'] != 'https://www.wikipedia.org/' + + @classmethod + def process_record(cls, site): + site.pop('exclude_regions', None) + return site + + +# To reflect the latest timestamps, Changes class should always come after +# all other RemoteSettings subclasses +processors = (TippyTopSites, SearchConfig, TopSites, Changes) for processor in processors: parsed_jsons = [] @@ -169,6 +221,11 @@ for processor in processors: with json_path.open() as file: parsed_jsons.append(File(json_path, json.load(file))) - processed = processor.process(parsed_jsons) + parsed_schema = None + if hasattr(processor, "SCHEMA_PATH"): + with processor.SCHEMA_PATH.open() as file: + parsed_schema = json.load(file) + + processed = processor.process(parsed_jsons, parsed_schema) with processed.path.open('w') as file: json.dump(processed.content, file, indent=arguments.indent) -- cgit v1.2.3