From 550d820a3bb3821acf9b5bde3aaf6fcc017fe51d Mon Sep 17 00:00:00 2001 From: bill-auger Date: Mon, 11 Jan 2021 02:05:56 -0500 Subject: [iceweasel]: housekeeping --- libre/iceweasel/process-json-files.py | 174 ++++++++++++++++++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100644 libre/iceweasel/process-json-files.py (limited to 'libre/iceweasel/process-json-files.py') diff --git a/libre/iceweasel/process-json-files.py b/libre/iceweasel/process-json-files.py new file mode 100644 index 000000000..2fdde62d4 --- /dev/null +++ b/libre/iceweasel/process-json-files.py @@ -0,0 +1,174 @@ +#! /usr/bin/python3 + +# Copyright (C) 2020 grizzlyuser +# Based on: https://gitlab.trisquel.org/trisquel/wrapage-helpers/-/blob/81881d89b2bf7d502dd14fcccdb471fec6f6b206/helpers/DATA/firefox/reprocess-search-config.py +# Below is the notice from the original author: +# +# Copyright (C) 2020 Ruben Rodriguez +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +import json +import sys +import time +import copy +import argparse +import pathlib +from collections import namedtuple +from jsonschema import validate + +parser = argparse.ArgumentParser() +parser.add_argument( + 'MAIN_PATH', + type=pathlib.Path, + help='path to main application source code directory') +parser.add_argument( + 'BRANDING_PATH', + type=pathlib.Path, + help='path to branding source code directory') +parser.add_argument( + '-i', + '--indent', + type=int, + help='indent for pretty printing of output files') +arguments = parser.parse_args() + +File = namedtuple('File', ['path', 'content']) + + +class RemoteSettings: + DUMPS_PATH = arguments.MAIN_PATH / 'services/settings/dumps' + JSON_PATHS = tuple(DUMPS_PATH.glob('*/*.json')) + WRAPPER_NAME = 'data' + + @classmethod + def wrap(cls, processed): + return File(processed.path, {cls.WRAPPER_NAME: processed.content}) + + @classmethod + def unwrap(cls, parsed_jsons): + return [File(json.path, json.content[cls.WRAPPER_NAME]) + for json in parsed_jsons] + + @classmethod + def process_raw(cls, unwrapped_jsons): + changes = [] + output_path = cls.DUMPS_PATH / 'monitor/changes.json' + + for collection in unwrapped_jsons: + if collection.path == cls.DUMPS_PATH / 'main/example.json': + continue + latest_change = {} + latest_change['last_modified'] = max( + (record['last_modified'] for record in collection.content), default=0) + latest_change['bucket'] = collection.path.parent.name + latest_change['collection'] = collection.path.stem + changes.append(latest_change) + + output_path.parent.mkdir(exist_ok=True) + + return File(output_path, changes) + + @classmethod + def process(cls, parsed_jsons): + return cls.wrap(cls.process_raw(cls.unwrap(parsed_jsons))) + + +class SearchConfig(RemoteSettings): + JSON_PATHS = (RemoteSettings.DUMPS_PATH / 'main/search-config.json',) + + def _get_schema(): + PATH = arguments.MAIN_PATH / \ + 'toolkit/components/search/schema/search-engine-config-schema.json' + with PATH.open() as file: + return json.load(file) + + @classmethod + def process_raw(cls, unwrapped_jsons): + _WHITELIST = ('ddg@search.mozilla.org', 'wikipedia@search.mozilla.org') + SCHEMA = cls._get_schema() + + search_engines, timestamps = [], [] + search_config = unwrapped_jsons[0] + + for search_engine in search_config.content: + if search_engine['webExtension']['id'] in _WHITELIST: + clone = copy.deepcopy(search_engine) + + if 'telemetryId' in search_engine: + del search_engine['telemetryId'] + if 'extraParams' in search_engine: + del search_engine['extraParams'] + + general_specifier = {} + for specifier in search_engine['appliesTo'].copy(): + if 'application' in specifier: + if 'distributions' in specifier['application']: + search_engine['appliesTo'].remove(specifier) + continue + if 'extraParams' in specifier['application']: + del specifier['application']['extraParams'] + + if 'included' in specifier and 'everywhere' in specifier[ + 'included'] and specifier['included']['everywhere']: + general_specifier = specifier + + if not general_specifier: + general_specifier = {'included': {'everywhere': True}} + search_engine['appliesTo'].insert(0, general_specifier) + if search_engine['webExtension']['id'] == _WHITELIST[0]: + general_specifier['default'] = 'yes' + + if clone != search_engine: + timestamp = int(round(time.time_ns() / 10 ** 6)) + while timestamp in timestamps: + timestamp += 1 + timestamps.append(timestamp) + search_engine['last_modified'] = timestamp + + validate(search_engine, schema=SCHEMA) + + search_engines.append(search_engine) + + return File(search_config.path, search_engines) + + +class TopSites: + JSON_PATHS = ( + arguments.MAIN_PATH / + 'browser/components/newtab/data/content/tippytop/top_sites.json', + arguments.BRANDING_PATH / + 'tippytop/top_sites.json') + + @classmethod + def process(cls, parsed_jsons): + main_top_sites = parsed_jsons[0] + branding_top_sites = parsed_jsons[1] + result = branding_top_sites.content + \ + [site for site in main_top_sites.content if site['title'] == 'wikipedia'] + return File(main_top_sites.path, result) + + +processors = (SearchConfig, TopSites, RemoteSettings) + +for processor in processors: + parsed_jsons = [] + for json_path in processor.JSON_PATHS: + with json_path.open() as file: + parsed_jsons.append(File(json_path, json.load(file))) + + processed = processor.process(parsed_jsons) + with processed.path.open('w') as file: + json.dump(processed.content, file, indent=arguments.indent) -- cgit v1.2.3