summaryrefslogtreecommitdiff
path: root/libre/iceweasel/process-json-files.py
diff options
context:
space:
mode:
Diffstat (limited to 'libre/iceweasel/process-json-files.py')
-rw-r--r--libre/iceweasel/process-json-files.py207
1 files changed, 132 insertions, 75 deletions
diff --git a/libre/iceweasel/process-json-files.py b/libre/iceweasel/process-json-files.py
index a972e90e2..69264dc94 100644
--- a/libre/iceweasel/process-json-files.py
+++ b/libre/iceweasel/process-json-files.py
@@ -1,6 +1,6 @@
#! /usr/bin/python3
-# Copyright (C) 2020 grizzlyuser <grizzlyuser@protonmail.com>
+# Copyright (C) 2020, 2021 grizzlyuser <grizzlyuser@protonmail.com>
# Based on: https://gitlab.trisquel.org/trisquel/wrapage-helpers/-/blob/81881d89b2bf7d502dd14fcccdb471fec6f6b206/helpers/DATA/firefox/reprocess-search-config.py
# Below is the notice from the original author:
#
@@ -42,6 +42,7 @@ parser.add_argument(
'-i',
'--indent',
type=int,
+ default=2,
help='indent for pretty printing of output files')
arguments = parser.parse_args()
@@ -49,103 +50,127 @@ File = namedtuple('File', ['path', 'content'])
class RemoteSettings:
- DUMPS_PATH = arguments.MAIN_PATH / 'services/settings/dumps'
- JSON_PATHS = tuple(DUMPS_PATH.glob('*/*.json'))
- WRAPPER_NAME = 'data'
+ DUMPS_PATH_RELATIVE = 'services/settings/dumps'
+ DUMPS_PATH_ABSOLUTE = arguments.MAIN_PATH / DUMPS_PATH_RELATIVE
+
+ _WRAPPER_NAME = 'data'
@classmethod
def wrap(cls, processed):
- return File(processed.path, {cls.WRAPPER_NAME: processed.content})
+ return File(processed.path, {cls._WRAPPER_NAME: processed.content})
@classmethod
def unwrap(cls, parsed_jsons):
- return [File(json.path, json.content[cls.WRAPPER_NAME])
+ return [File(json.path, json.content[cls._WRAPPER_NAME])
for json in parsed_jsons]
@classmethod
- def process_raw(cls, unwrapped_jsons):
- changes = []
- output_path = cls.DUMPS_PATH / 'monitor/changes.json'
+ def should_modify_collection(cls, collection):
+ return True
+ @classmethod
+ def process_raw(cls, unwrapped_jsons, parsed_schema):
+ timestamps, result = [], []
for collection in unwrapped_jsons:
- if collection.path == cls.DUMPS_PATH / 'main/example.json':
- continue
- latest_change = {}
- latest_change['last_modified'] = max(
- (record['last_modified'] for record in collection.content), default=0)
- latest_change['bucket'] = collection.path.parent.name
- latest_change['collection'] = collection.path.stem
- changes.append(latest_change)
+ should_modify_collection = cls.should_modify_collection(collection)
+ for record in collection.content:
+ if should_modify_collection:
+ if cls.should_drop_record(record):
+ continue
- output_path.parent.mkdir(exist_ok=True)
+ clone = copy.deepcopy(record)
- return File(output_path, changes)
+ record = cls.process_record(record)
- @classmethod
- def process(cls, parsed_jsons):
- return cls.wrap(cls.process_raw(cls.unwrap(parsed_jsons)))
+ if clone != record:
+ timestamp = int(round(time.time_ns() / 10 ** 6))
+ while timestamp in timestamps:
+ timestamp += 1
+ timestamps.append(timestamp)
+ record['last_modified'] = timestamp
+ if parsed_schema is not None:
+ validate(record, schema=parsed_schema)
-class SearchConfig(RemoteSettings):
- JSON_PATHS = (RemoteSettings.DUMPS_PATH / 'main/search-config.json',)
+ result.append(record)
+
+ cls.OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
- def _get_schema():
- PATH = arguments.MAIN_PATH / \
- 'toolkit/components/search/schema/search-engine-config-schema.json'
- with PATH.open() as file:
- return json.load(file)
+ return File(cls.OUTPUT_PATH, result)
@classmethod
- def process_raw(cls, unwrapped_jsons):
- _WHITELIST = ('ddg@search.mozilla.org', 'wikipedia@search.mozilla.org')
- SCHEMA = cls._get_schema()
+ def process(cls, parsed_jsons, parsed_schema):
+ return cls.wrap(
+ cls.process_raw(
+ cls.unwrap(parsed_jsons),
+ parsed_schema))
- search_engines, timestamps = [], []
- search_config = unwrapped_jsons[0]
- for search_engine in search_config.content:
- if search_engine['webExtension']['id'] in _WHITELIST:
- clone = copy.deepcopy(search_engine)
+class Changes(RemoteSettings):
+ JSON_PATHS = tuple(RemoteSettings.DUMPS_PATH_ABSOLUTE.glob('*/*.json'))
+ OUTPUT_PATH = RemoteSettings.DUMPS_PATH_ABSOLUTE / 'monitor/changes.json'
- if 'telemetryId' in search_engine:
- del search_engine['telemetryId']
- if 'extraParams' in search_engine:
- del search_engine['extraParams']
+ @classmethod
+ def process_raw(cls, unwrapped_jsons, parsed_schema):
+ changes = []
+
+ for collection in unwrapped_jsons:
+ if collection.path != RemoteSettings.DUMPS_PATH_ABSOLUTE / 'main/example.json':
+ latest_change = {}
+ latest_change['last_modified'] = max(
+ (record['last_modified'] for record in collection.content), default=0)
+ latest_change['bucket'] = collection.path.parent.name
+ latest_change['collection'] = collection.path.stem
+ changes.append(latest_change)
+
+ cls.OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
+
+ return File(cls.OUTPUT_PATH, changes)
+
+
+class SearchConfig(RemoteSettings):
+ JSON_PATHS = (
+ RemoteSettings.DUMPS_PATH_ABSOLUTE /
+ 'main/search-config.json',
+ )
+ SCHEMA_PATH = arguments.MAIN_PATH / \
+ 'toolkit/components/search/schema/search-engine-config-schema.json'
+ OUTPUT_PATH = JSON_PATHS[0]
- general_specifier = {}
- for specifier in search_engine['appliesTo'].copy():
- if 'application' in specifier:
- if 'distributions' in specifier['application']:
- search_engine['appliesTo'].remove(specifier)
- continue
- if 'extraParams' in specifier['application']:
- del specifier['application']['extraParams']
+ _DUCKDUCKGO_SEARCH_ENGINE_ID = 'ddg@search.mozilla.org'
- if 'included' in specifier and 'everywhere' in specifier[
- 'included'] and specifier['included']['everywhere']:
- general_specifier = specifier
+ @classmethod
+ def should_drop_record(cls, search_engine):
+ return search_engine['webExtension']['id'] not in (
+ cls._DUCKDUCKGO_SEARCH_ENGINE_ID, 'wikipedia@search.mozilla.org')
- if not general_specifier:
- general_specifier = {'included': {'everywhere': True}}
- search_engine['appliesTo'].insert(0, general_specifier)
- if search_engine['webExtension']['id'] == _WHITELIST[0]:
- general_specifier['default'] = 'yes'
+ @classmethod
+ def process_record(cls, search_engine):
+ [search_engine.pop(key, None)
+ for key in ['extraParams', 'telemetryId']]
- if clone != search_engine:
- timestamp = int(round(time.time_ns() / 10 ** 6))
- while timestamp in timestamps:
- timestamp += 1
- timestamps.append(timestamp)
- search_engine['last_modified'] = timestamp
+ general_specifier = {}
+ for specifier in search_engine['appliesTo'].copy():
+ if 'application' in specifier:
+ if 'distributions' in specifier['application']:
+ search_engine['appliesTo'].remove(specifier)
+ continue
+ specifier['application'].pop('extraParams', None)
- validate(search_engine, schema=SCHEMA)
+ if 'included' in specifier and 'everywhere' in specifier[
+ 'included'] and specifier['included']['everywhere']:
+ general_specifier = specifier
- search_engines.append(search_engine)
+ if not general_specifier:
+ general_specifier = {'included': {'everywhere': True}}
+ search_engine['appliesTo'].insert(0, general_specifier)
+ if search_engine['webExtension']['id'] == cls._DUCKDUCKGO_SEARCH_ENGINE_ID:
+ general_specifier['default'] = 'yes'
- return File(search_config.path, search_engines)
+ return search_engine
-class TopSites:
+class TippyTopSites:
JSON_PATHS = (
arguments.MAIN_PATH /
'browser/components/newtab/data/content/tippytop/top_sites.json',
@@ -153,15 +178,42 @@ class TopSites:
'tippytop/top_sites.json')
@classmethod
- def process(cls, parsed_jsons):
- main_top_sites = parsed_jsons[0]
- branding_top_sites = parsed_jsons[1]
- result = branding_top_sites.content + \
- [site for site in main_top_sites.content if 'wikipedia.org' in site['domains']]
- return File(main_top_sites.path, result)
+ def process(cls, parsed_jsons, parsed_schema):
+ tippy_top_sites_main = parsed_jsons[0]
+ tippy_top_sites_branding = parsed_jsons[1]
+ result = tippy_top_sites_branding.content + \
+ [site for site in tippy_top_sites_main.content if 'wikipedia.org' in site['domains']]
+ return File(tippy_top_sites_main.path, result)
+
+class TopSites(RemoteSettings):
+ _TOP_SITES_JSON_PATH = 'main/top-sites.json'
+ _TOP_SITES_PATH_MAIN = RemoteSettings.DUMPS_PATH_ABSOLUTE / _TOP_SITES_JSON_PATH
-processors = (SearchConfig, TopSites, RemoteSettings)
+ JSON_PATHS = (
+ arguments.BRANDING_PATH /
+ RemoteSettings.DUMPS_PATH_RELATIVE /
+ _TOP_SITES_JSON_PATH,
+ _TOP_SITES_PATH_MAIN)
+ OUTPUT_PATH = _TOP_SITES_PATH_MAIN
+
+ @classmethod
+ def should_modify_collection(cls, collection):
+ return cls._TOP_SITES_PATH_MAIN == collection.path
+
+ @classmethod
+ def should_drop_record(cls, site):
+ return site['url'] != 'https://www.wikipedia.org/'
+
+ @classmethod
+ def process_record(cls, site):
+ site.pop('exclude_regions', None)
+ return site
+
+
+# To reflect the latest timestamps, Changes class should always come after
+# all other RemoteSettings subclasses
+processors = (TippyTopSites, SearchConfig, TopSites, Changes)
for processor in processors:
parsed_jsons = []
@@ -169,6 +221,11 @@ for processor in processors:
with json_path.open() as file:
parsed_jsons.append(File(json_path, json.load(file)))
- processed = processor.process(parsed_jsons)
+ parsed_schema = None
+ if hasattr(processor, "SCHEMA_PATH"):
+ with processor.SCHEMA_PATH.open() as file:
+ parsed_schema = json.load(file)
+
+ processed = processor.process(parsed_jsons, parsed_schema)
with processed.path.open('w') as file:
json.dump(processed.content, file, indent=arguments.indent)