#!/usr/bin/env python # -*- coding: utf8 -*- # :Copyright: © 2020 Günter Milde. # :License: Released under the terms of the `2-Clause BSD license`_, in short: # # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright # notice and this notice are preserved. # This file is offered as-is, without any warranty. # # .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause # # Revision: $Revision: 8683 $ # Date: $Date: 2021-04-09 17:44:23 +0200 (Fr, 09. Apr 2021) $ """ A parser for CommonMark MarkDown text using `recommonmark`__. __ https://pypi.org/project/recommonmark/ """ import docutils.parsers from docutils import nodes, Component try: from recommonmark.parser import CommonMarkParser # from recommonmark.transform import AutoStructify except ImportError as err: CommonMarkParser = None class Parser(docutils.parsers.Parser): def parse(self, inputstring, document): error = document.reporter.warning( 'Missing dependency: MarkDown input is processed by a 3rd ' 'party parser but Python did not find the required module ' '"recommonmark" (https://pypi.org/project/recommonmark/).') document.append(error) if CommonMarkParser: class Parser(CommonMarkParser): """MarkDown parser based on recommonmark.""" # TODO: settings for AutoStructify # settings_spec = docutils.parsers.Parser.settings_spec + ( # see https://recommonmark.readthedocs.io/en/latest/#autostructify supported = ('recommonmark', 'commonmark', 'markdown', 'md') config_section = 'recommonmark parser' config_section_dependencies = ('parsers',) # def get_transforms(self): # return Component.get_transforms(self) + [AutoStructify] def parse(self, inputstring, document): """Use the upstream parser and clean up afterwards. """ # check for exorbitantly long lines for i, line in enumerate(inputstring.split('\n')): if len(line) > document.settings.line_length_limit: error = document.reporter.error( 'Line %d exceeds the line-length-limit.'%(i+1)) document.append(error) return # pass to upstream parser try: CommonMarkParser.parse(self, inputstring, document) except Exception as err: error = document.reporter.error('Parsing with "recommonmark" ' 'returned the error:\n%s'%err) document.append(error) # Post-Processing # --------------- # merge adjoining Text nodes: for node in document.traverse(nodes.TextElement): children = node.children i = 0 while i+1 < len(children): if (isinstance(children[i], nodes.Text) and isinstance(children[i+1], nodes.Text)): children[i] = nodes.Text(children[i]+children.pop(i+1)) children[i].parent = node else: i += 1 # add "code" class argument to inline literal (code spans) for node in document.traverse(lambda n: isinstance(n, (nodes.literal, nodes.literal_block))): node['classes'].append('code') # move "language" argument to classes for node in document.traverse(nodes.literal_block): if 'language' in node.attributes: node['classes'].append(node['language']) del node['language'] # remove empty target nodes for node in document.traverse(nodes.target): # remove empty name node['names'] = [v for v in node['names'] if v] if node.children or [v for v in node.attributes.values() if v]: continue node.parent.remove(node) # replace raw nodes if raw is not allowed if not document.settings.raw_enabled: for node in document.traverse(nodes.raw): warning = document.reporter.warning('Raw content disabled.') node.parent.replace(node, warning) # fix section nodes for node in document.traverse(nodes.section): # remove spurious IDs (first may be from duplicate name) if len(node['ids']) > 1: node['ids'].pop() # fix section levels section_level = self.get_section_level(node) if node['level'] != section_level: warning = document.reporter.warning( 'Title level inconsistent. Changing from %d to %d.' %(node['level'], section_level), nodes.literal_block('', node[0].astext())) node.insert(1, warning) # remove non-standard attribute "level" del node['level'] # TODO: store the original md level somewhere def get_section_level(self, node): level = 1 while True: node = node.parent if isinstance(node, nodes.document): return level if isinstance(node, nodes.section): level += 1