#! /usr/bin/python3 # Copyright (C) 2020 grizzlyuser # Based on: https://gitlab.trisquel.org/trisquel/wrapage-helpers/-/blob/81881d89b2bf7d502dd14fcccdb471fec6f6b206/helpers/DATA/firefox/reprocess-search-config.py # Below is the notice from the original author: # # Copyright (C) 2020 Ruben Rodriguez # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA import json import sys import time import copy import argparse import pathlib from collections import namedtuple from jsonschema import validate parser = argparse.ArgumentParser() parser.add_argument( 'MAIN_PATH', type=pathlib.Path, help='path to main application source code directory') parser.add_argument( 'BRANDING_PATH', type=pathlib.Path, help='path to branding source code directory') parser.add_argument( '-i', '--indent', type=int, help='indent for pretty printing of output files') arguments = parser.parse_args() File = namedtuple('File', ['path', 'content']) class RemoteSettings: DUMPS_PATH = arguments.MAIN_PATH / 'services/settings/dumps' JSON_PATHS = tuple(DUMPS_PATH.glob('*/*.json')) WRAPPER_NAME = 'data' @classmethod def wrap(cls, processed): return File(processed.path, {cls.WRAPPER_NAME: processed.content}) @classmethod def unwrap(cls, parsed_jsons): return [File(json.path, json.content[cls.WRAPPER_NAME]) for json in parsed_jsons] @classmethod def process_raw(cls, unwrapped_jsons): changes = [] output_path = cls.DUMPS_PATH / 'monitor/changes.json' for collection in unwrapped_jsons: if collection.path == cls.DUMPS_PATH / 'main/example.json': continue latest_change = {} latest_change['last_modified'] = max( (record['last_modified'] for record in collection.content), default=0) latest_change['bucket'] = collection.path.parent.name latest_change['collection'] = collection.path.stem changes.append(latest_change) output_path.parent.mkdir(exist_ok=True) return File(output_path, changes) @classmethod def process(cls, parsed_jsons): return cls.wrap(cls.process_raw(cls.unwrap(parsed_jsons))) class SearchConfig(RemoteSettings): JSON_PATHS = (RemoteSettings.DUMPS_PATH / 'main/search-config.json',) def _get_schema(): PATH = arguments.MAIN_PATH / \ 'toolkit/components/search/schema/search-engine-config-schema.json' with PATH.open() as file: return json.load(file) @classmethod def process_raw(cls, unwrapped_jsons): _WHITELIST = ('ddg@search.mozilla.org', 'wikipedia@search.mozilla.org') SCHEMA = cls._get_schema() search_engines, timestamps = [], [] search_config = unwrapped_jsons[0] for search_engine in search_config.content: if search_engine['webExtension']['id'] in _WHITELIST: clone = copy.deepcopy(search_engine) if 'telemetryId' in search_engine: del search_engine['telemetryId'] if 'extraParams' in search_engine: del search_engine['extraParams'] general_specifier = {} for specifier in search_engine['appliesTo'].copy(): if 'application' in specifier: if 'distributions' in specifier['application']: search_engine['appliesTo'].remove(specifier) continue if 'extraParams' in specifier['application']: del specifier['application']['extraParams'] if 'included' in specifier and 'everywhere' in specifier[ 'included'] and specifier['included']['everywhere']: general_specifier = specifier if not general_specifier: general_specifier = {'included': {'everywhere': True}} search_engine['appliesTo'].insert(0, general_specifier) if search_engine['webExtension']['id'] == _WHITELIST[0]: general_specifier['default'] = 'yes' if clone != search_engine: timestamp = int(round(time.time_ns() / 10 ** 6)) while timestamp in timestamps: timestamp += 1 timestamps.append(timestamp) search_engine['last_modified'] = timestamp validate(search_engine, schema=SCHEMA) search_engines.append(search_engine) return File(search_config.path, search_engines) class TopSites: JSON_PATHS = ( arguments.MAIN_PATH / 'browser/components/newtab/data/content/tippytop/top_sites.json', arguments.BRANDING_PATH / 'tippytop/top_sites.json') @classmethod def process(cls, parsed_jsons): main_top_sites = parsed_jsons[0] branding_top_sites = parsed_jsons[1] result = branding_top_sites.content + \ [site for site in main_top_sites.content if 'wikipedia.org' in site['domains']] return File(main_top_sites.path, result) processors = (SearchConfig, TopSites, RemoteSettings) for processor in processors: parsed_jsons = [] for json_path in processor.JSON_PATHS: with json_path.open() as file: parsed_jsons.append(File(json_path, json.load(file))) processed = processor.process(parsed_jsons) with processed.path.open('w') as file: json.dump(processed.content, file, indent=arguments.indent)