Merge pull request #631 from python-cmd2/comment_overhaul

tleonhardt · web-flow · commit eb86c7391875 · 2019-03-04T22:01:27.000-05:00
Comment overhaul
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,13 @@
     ``AutoCompleter`` which has since developed a dependency on ``cmd2`` methods. 
     * Removed ability to call commands in ``pyscript`` as if they were functions (e.g ``app.help()``) in favor
     of only supporting one ``pyscript`` interface. This simplifies future maintenance.
+    * No longer supporting C-style comments. Hash (#) is the only valid comment marker.
+    * No longer supporting comments embedded in a command. Only command line input where the first
+    non-whitespace character is a # will be treated as a comment. This means any # character appearing
+    later in the command will be treated as a literal. The same applies to a # in the middle of a multiline
+    command, even if it is the first character on a line.
+        * \# this is a comment
+        * this # is not a comment
 
 ## 0.9.10 (February 22, 2019)
 * Bug Fixes
diff --git a/CODEOWNERS b/CODEOWNERS
@@ -14,15 +14,16 @@
 #docs/*  docs@example.com
 
 # cmd2 code
-cmd2/__init__.py        @tleonhardt @kotfu
-cmd2/arg*.py            @anselor
-cmd2/cmd2.py            @tleonhardt @kmvanbrunt @kotfu
-cmd2/constants.py       @kotfu
-cmd2/parsing.py         @kotfu @kmvanbrunt
-cmd2/pyscript*.py       @anselor
-cmd2/rl_utils.py        @kmvanbrunt
-cmd2/transcript.py      @kotfu
-cmd2/utils.py           @tleonhardt @kotfu @kmvanbrunt
+cmd2/__init__.py           @tleonhardt @kotfu
+cmd2/argparse_completer.py @anselor @kmvanbrunt
+cmd2/clipboard.py          @tleonhardt
+cmd2/cmd2.py               @tleonhardt @kmvanbrunt @kotfu
+cmd2/constants.py          @kotfu
+cmd2/parsing.py            @kotfu @kmvanbrunt
+cmd2/pyscript_bridge.py    @anselor @kmvanbrunt
+cmd2/rl_utils.py           @kmvanbrunt
+cmd2/transcript.py         @kotfu
+cmd2/utils.py              @tleonhardt @kotfu @kmvanbrunt
 
 # Sphinx documentation
 docs/*                  @tleonhardt @kotfu
diff --git a/cmd2/cmd2.py b/cmd2/cmd2.py
@@ -160,7 +160,7 @@ def parse_quoted_string(string: str, preserve_quotes: bool) -> List[str]:
         lexed_arglist = string
     else:
         # Use shlex to split the command line into a list of arguments based on shell rules
-        lexed_arglist = shlex.split(string, posix=False)
+        lexed_arglist = shlex.split(string, comments=False, posix=False)
 
         if not preserve_quotes:
             lexed_arglist = [utils.strip_quotes(arg) for arg in lexed_arglist]
@@ -761,7 +761,7 @@ def tokens_for_completion(self, line: str, begidx: int, endidx: int) -> Tuple[Li
         while True:
             try:
                 # Use non-POSIX parsing to keep the quotes around the tokens
-                initial_tokens = shlex.split(tmp_line[:tmp_endidx], posix=False)
+                initial_tokens = shlex.split(tmp_line[:tmp_endidx], comments=False, posix=False)
 
                 # If the cursor is at an empty token outside of a quoted string,
                 # then that is the token being completed. Add it to the list.
@@ -2283,7 +2283,7 @@ def alias_list(self, args: argparse.Namespace) -> None:
                            "  would for the actual command the alias resolves to.\n"
                            "\n"
                            "Examples:\n"
-                           "  alias ls !ls -lF\n"
+                           "  alias create ls !ls -lF\n"
                            "  alias create show_log !cat \"log file.txt\"\n"
                            "  alias create save_results print_results \">\" out.txt\n")
 
diff --git a/cmd2/constants.py b/cmd2/constants.py
@@ -12,6 +12,7 @@
 REDIRECTION_APPEND = '>>'
 REDIRECTION_CHARS = [REDIRECTION_PIPE, REDIRECTION_OUTPUT]
 REDIRECTION_TOKENS = [REDIRECTION_PIPE, REDIRECTION_OUTPUT, REDIRECTION_APPEND]
+COMMENT_CHAR = '#'
 
 # Regular expression to match ANSI escape codes
 ANSI_ESCAPE_RE = re.compile(r'\x1b[^m]*m')
diff --git a/cmd2/parsing.py b/cmd2/parsing.py
@@ -236,33 +236,6 @@ def __init__(
         else:
             self.shortcuts = shortcuts
 
-        # this regular expression matches C-style comments and quoted
-        # strings, i.e. stuff between single or double quote marks
-        # it's used with _comment_replacer() to strip out the C-style
-        # comments, while leaving C-style comments that are inside either
-        # double or single quotes.
-        #
-        # this big regular expression can be broken down into 3 regular
-        # expressions that are OR'ed together with a pipe character
-        #
-        # /\*.*\*/               Matches C-style comments (i.e. /* comment */)
-        #                        does not match unclosed comments.
-        # \'(?:\\.|[^\\\'])*\'   Matches a single quoted string, allowing
-        #                        for embedded backslash escaped single quote
-        #                        marks.
-        # "(?:\\.|[^\\"])*"      Matches a double quoted string, allowing
-        #                        for embedded backslash escaped double quote
-        #                        marks.
-        #
-        # by way of reminder the (?:...) regular expression syntax is just
-        # a non-capturing version of regular parenthesis. We need the non-
-        # capturing syntax because _comment_replacer() looks at match
-        # groups
-        self.comment_pattern = re.compile(
-            r'/\*.*\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
-            re.DOTALL | re.MULTILINE
-        )
-
         # commands have to be a word, so make a regular expression
         # that matches the first word in the line. This regex has three
         # parts:
@@ -315,6 +288,9 @@ def is_valid_command(self, word: str) -> Tuple[bool, str]:
         if not word:
             return False, 'cannot be an empty string'
 
+        if word.startswith(constants.COMMENT_CHAR):
+            return False, 'cannot start with the comment character'
+
         for (shortcut, _) in self.shortcuts:
             if word.startswith(shortcut):
                 # Build an error string with all shortcuts listed
@@ -338,24 +314,23 @@ def is_valid_command(self, word: str) -> Tuple[bool, str]:
     def tokenize(self, line: str) -> List[str]:
         """Lex a string into a list of tokens.
 
-        Comments are removed, and shortcuts and aliases are expanded.
+        shortcuts and aliases are expanded and comments are removed
 
         Raises ValueError if there are unclosed quotation marks.
         """
 
-        # strip C-style comments
-        # shlex will handle the python/shell style comments for us
-        line = re.sub(self.comment_pattern, self._comment_replacer, line)
-
         # expand shortcuts and aliases
         line = self._expand(line)
 
+        # check if this line is a comment
+        if line.strip().startswith(constants.COMMENT_CHAR):
+            return []
+
         # split on whitespace
-        lexer = shlex.shlex(line, posix=False)
-        lexer.whitespace_split = True
+        tokens = shlex.split(line, comments=False, posix=False)
 
         # custom lexing
-        tokens = self._split_on_punctuation(list(lexer))
+        tokens = self._split_on_punctuation(tokens)
         return tokens
 
     def parse(self, line: str) -> Statement:
@@ -610,15 +585,6 @@ def _command_and_args(tokens: List[str]) -> Tuple[str, str]:
 
         return command, args
 
-    @staticmethod
-    def _comment_replacer(match):
-        matched_string = match.group(0)
-        if matched_string.startswith('/'):
-            # the matched string was a comment, so remove it
-            return ''
-        # the matched string was a quoted string, return the match
-        return matched_string
-
     def _split_on_punctuation(self, tokens: List[str]) -> List[str]:
         """Further splits tokens from a command line using punctuation characters
 
diff --git a/docs/freefeatures.rst b/docs/freefeatures.rst
@@ -29,23 +29,16 @@ Simply include one command per line, typed exactly as you would inside a ``cmd2`
 Comments
 ========
 
-Comments are omitted from the argument list
-before it is passed to a ``do_`` method.  By
-default, both Python-style and C-style comments
-are recognized. Comments can be useful in :ref:`scripts`, but would
-be pointless within an interactive session.
+Any command line input where the first non-whitespace character is a # will be treated as a comment.
+This means any # character appearing later in the command will be treated as a literal. The same
+applies to a # in the middle of a multiline command, even if it is the first character on a line.
 
-::
-
-    def do_speak(self, arg):
-        self.stdout.write(arg + '\n')
+Comments can be useful in :ref:`scripts`, but would be pointless within an interactive session.
 
 ::
 
-  (Cmd) speak it was /* not */ delicious! # Yuck!
-  it was  delicious!
-
-.. _arg_print: https://github.com/python-cmd2/cmd2/blob/master/examples/arg_print.py
+  (Cmd) # this is a comment
+  (Cmd) this # is not a comment
 
 Startup Initialization Script
 =============================
@@ -209,9 +202,9 @@ is superior for doing this in two primary ways:
 - it has the ability to pass command-line arguments to the scripts invoked
 
 There are no disadvantages to using ``pyscript`` as opposed to ``py run()``.  A simple example
-of using ``pyscript`` is shown below  along with the **examples/arg_printer.py** script::
+of using ``pyscript`` is shown below  along with the arg_printer_ script::
 
-    (Cmd) pyscript examples/arg_printer.py foo bar baz
+    (Cmd) pyscript examples/scripts/arg_printer.py foo bar baz
     Running Python script 'arg_printer.py' which was called with 3 arguments
     arg 1: 'foo'
     arg 2: 'bar'
@@ -224,11 +217,12 @@ of using ``pyscript`` is shown below  along with the **examples/arg_printer.py**
 
     When using this decorator, you can then put arguments in quotes like so (NOTE: the ``do_pyscript`` method uses this decorator::
 
-        (Cmd) pyscript examples/arg_printer.py hello '23 fnord'
+        (Cmd) pyscript examples/scripts/arg_printer.py hello '23 fnord'
         Running Python script 'arg_printer.py' which was called with 2 arguments
         arg 1: 'hello'
         arg 2: '23 fnord'
 
+.. _arg_printer: https://github.com/python-cmd2/cmd2/blob/master/examples/scripts/arg_printer.py
 
 IPython (optional)
 ==================
diff --git a/tests/test_argparse.py b/tests/test_argparse.py
@@ -141,10 +141,6 @@ def test_argparse_with_list_and_empty_doc(argparse_app):
     out = run_cmd(argparse_app, 'speak -s hello world!')
     assert out == ['HELLO WORLD!']
 
-def test_argparse_comment_stripping(argparse_app):
-    out = run_cmd(argparse_app, 'speak it was /* not */ delicious! # Yuck!')
-    assert out == ['it was delicious!']
-
 def test_argparser_correct_args_with_quotes_and_midline_options(argparse_app):
     out = run_cmd(argparse_app, "speak 'This  is a' -s test of the emergency broadcast system!")
     assert out == ['THIS  IS A TEST OF THE EMERGENCY BROADCAST SYSTEM!']
diff --git a/tests/test_cmd2.py b/tests/test_cmd2.py
@@ -24,8 +24,7 @@
     from unittest import mock
 
 import cmd2
-from cmd2 import clipboard
-from cmd2 import utils
+from cmd2 import clipboard, constants, utils
 from .conftest import run_cmd, normalize, BASE_HELP, BASE_HELP_VERBOSE, \
     HELP_HISTORY, SHORTCUTS_TXT, SHOW_TXT, SHOW_LONG
 
@@ -1828,6 +1827,7 @@ def test_poutput_color_never(base_app):
 # These are invalid names for aliases and macros
 invalid_command_name = [
     '""',  # Blank name
+    constants.COMMENT_CHAR,
     '!no_shortcut',
     '">"',
     '"no>pe"',
@@ -1900,6 +1900,17 @@ def test_alias_create_with_macro_name(base_app, capsys):
     out, err = capsys.readouterr()
     assert "Alias cannot have the same name as a macro" in err
 
+def test_alias_that_resolves_into_comment(base_app, capsys):
+    # Create the alias
+    out = run_cmd(base_app, 'alias create fake ' + constants.COMMENT_CHAR + ' blah blah')
+    assert out == normalize("Alias 'fake' created")
+
+    # Use the alias
+    run_cmd(base_app, 'fake')
+    out, err = capsys.readouterr()
+    assert not out
+    assert not err
+
 def test_alias_list_invalid_alias(base_app, capsys):
     # Look up invalid alias
     out = run_cmd(base_app, 'alias list invalid')
@@ -2056,6 +2067,17 @@ def test_macro_create_with_missing_unicode_arg_nums(base_app, capsys):
     out, err = capsys.readouterr()
     assert "Not all numbers between 1 and 3" in err
 
+def test_macro_that_resolves_into_comment(base_app, capsys):
+    # Create the macro
+    out = run_cmd(base_app, 'macro create fake {1} blah blah')
+    assert out == normalize("Macro 'fake' created")
+
+    # Use the macro
+    run_cmd(base_app, 'fake ' + constants.COMMENT_CHAR)
+    out, err = capsys.readouterr()
+    assert not out
+    assert not err
+
 def test_macro_list_invalid_macro(base_app, capsys):
     # Look up invalid macro
     run_cmd(base_app, 'macro list invalid')
diff --git a/tests/test_parsing.py b/tests/test_parsing.py