Source code for boussole.parser

# -*- coding: utf-8 -*-
"""
.. _SASS Reference:
    http://sass-lang.com/documentation/file.SASS_REFERENCE.html#import

Parser
======

Parser is in charge to find every ``@import`` rules in given SASS content.

It has been builded following `SASS Reference`_ about ``@import`` rule.
"""
import re

from boussole.exceptions import InvalidImportRule


[docs]class ScssImportsParser(object): """ SCSS parser to find import rules. This does not support the old SASS syntax (also known as "indented syntax"). It's a mixin, meaning without own ``__init__`` method so it's should be safe enough to inherit it from another class. Attributes: REGEX_IMPORT_RULE: Compiled regex used to find import rules. REGEX_COMMENTS: Compiled regex used to find and remove comments. """ REGEX_IMPORT_RULE = re.compile(ur'@import\s*(url)?\s*\(?([^;]+?)\)?;', re.IGNORECASE) # Second part (for singleline comment) contain a negative lookbehind # assertion to avoid to match on url protocole (http://) and cause issues # in parsing REGEX_COMMENTS = re.compile(r'(/\*.*?\*/)|((?<!(:))//.*?(\n|$))', re.IGNORECASE | re.DOTALL)
[docs] def strip_quotes(self, content): """ Unquote given rule. Args: content (str): An import rule. Raises: InvalidImportRule: Raise exception if the rule is badly quoted (not started or not ended quotes). Returns: string: The given rule unquoted. """ error_msg = "Following rule is badly quoted: {}" if (content.startswith('"') and content.endswith('"')) or \ (content.startswith("'") and content.endswith("'")): return content[1:-1] # Quote starting but not ended elif (content.startswith('"') and not content.endswith('"')) or \ (content.startswith("'") and not content.endswith("'")): raise InvalidImportRule(error_msg.format(content)) # Quote ending but not started elif (not content.startswith('"') and content.endswith('"')) or \ (not content.startswith("'") and content.endswith("'")): raise InvalidImportRule(error_msg.format(content)) return content
[docs] def remove_comments(self, content): """ Remove all comment kind (inline and multiline) from given content. Args: content (str): A SCSS source. Returns: string: Given SCSS source with all comments removed. """ return self.REGEX_COMMENTS.sub("", content)
[docs] def filter_rules(self, path): """ Lambda to filter items that: * Starts with http:// or https:// (this for external load only) * Ends with ".css" (they are not intended to be compiled) """ return not(path.startswith('http://') or path.startswith('https://') or path.endswith('.css'))
[docs] def flatten_rules(self, declarations): """ Flatten returned import rules from regex. Because import rules can contains multiple items in the same rule (called multiline import rule), the regex ``REGEX_IMPORT_RULE`` return a list of unquoted items for each rule. Args: declarations (list): A SCSS source. Returns: list: Given SCSS source with all comments removed. """ rules = [] for protocole, paths in declarations: # If there is a protocole (like 'url), drop it if protocole: continue # Unquote and possibly split multiple rule in the same declaration rules.extend([self.strip_quotes(v.strip()) for v in paths.split(',')]) return filter(self.filter_rules, rules)
[docs] def parse(self, content): """ Parse a stylesheet document with a regex (``REGEX_IMPORT_RULE``) to extract all import rules and return them. Args: content (str): A SCSS source. Returns: list: Finded paths in import rules. """ # Remove all comments before searching for import rules, to not catch # commented breaked import rules declarations = self.REGEX_IMPORT_RULE.findall( self.remove_comments(content) ) return self.flatten_rules(declarations)