plugins/externaltools/tools/linkparsing.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232

# -*- coding: utf-8 -*-
#
#    Copyright (C) 2009-2010  Per Arneng <per.arneng@anyplanet.com>
#    Copyright (C) 2012-2021 MATE Developers
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

import re

class Link:
    """
    This class represents a file link from within a string given by the
    output of some software tool. A link contains a reference to a file, the
    line number within the file and the boundaries within the given output
    string that should be marked as a link.
    """

    def __init__(self, path, line_nr, start, end):
        """
        path -- the path of the file (that could be extracted)
        line_nr -- the line nr of the specified file
        start -- the index within the string that the link starts at
        end -- the index within the string where the link ends at
        """
        self.path    = path
        self.line_nr = int(line_nr)
        self.start   = start
        self.end     = end

    def __repr__(self):
        return "%s[%s](%s:%s)" % (self.path, self.line_nr,
                                  self.start, self.end)

class LinkParser:
    """
    Parses a text using different parsing providers with the goal of finding one
    or more file links within the text. A typical example could be the output
    from a compiler that specifies an error in a specific file. The path of the
    file, the line nr and some more info is then returned so that it can be used
    to be able to navigate from the error output in to the specific file.

    The actual work of parsing the text is done by instances of classes that
    inherits from AbstractLinkParser or by regular expressions. To add a new
    parser just create a class that inherits from AbstractLinkParser and then
    register in this class cunstructor using the method add_parser. If you want
    to add a regular expression then just call add_regexp in this class
    constructor and provide your regexp string as argument.
    """

    def __init__(self):
        self._providers = []
        self.add_regexp(REGEXP_STANDARD)
        self.add_regexp(REGEXP_PYTHON)
        self.add_regexp(REGEXP_VALAC)
        self.add_regexp(REGEXP_BASH)
        self.add_regexp(REGEXP_RUBY)
        self.add_regexp(REGEXP_PERL)
        self.add_regexp(REGEXP_MCS)

    def add_parser(self, parser):
        self._providers.append(parser)

    def add_regexp(self, regexp):
        """
        Adds a regular expression string that should match a link using
        re.MULTILINE and re.VERBOSE regexp. The area marked as a link should
        be captured by a group named lnk. The path of the link should be
        captured by a group named pth. The line number should be captured by
        a group named ln. To read more about this look at the documentation
        for the RegexpLinkParser constructor.
        """
        self.add_parser(RegexpLinkParser(regexp))

    def parse(self, text):
        """
        Parses the given text and returns a list of links that are parsed from
        the text. This method delegates to parser providers that can parse
        output from different kinds of formats. If no links are found then an
        empty list is returned.

        text -- the text to scan for file links. 'text' can not be None.
        """
        if text is None:
            raise ValueError("text can not be None")

        links = []

        for provider in self._providers:
            links.extend(provider.parse(text))

        return links

class AbstractLinkParser(object):
    """The "abstract" base class for link parses"""

    def parse(self, text):
        """
        This method should be implemented by subclasses. It takes a text as
        argument (never None) and then returns a list of Link objects. If no
        links are found then an empty list is expected. The Link class is
        defined in this module. If you do not override this method then a
        NotImplementedError will be thrown.

        text -- the text to parse. This argument is never None.
        """
        raise NotImplementedError("need to implement a parse method")

class RegexpLinkParser(AbstractLinkParser):
    """
    A class that represents parsers that only use one single regular expression.
    It can be used by subclasses or by itself. See the constructor documentation
    for details about the rules surrouning the regexp.
    """

    def __init__(self, regex):
        """
        Creates a new RegexpLinkParser based on the given regular expression.
        The regular expression is multiline and verbose (se python docs on
        compilation flags). The regular expression should contain three named
        capturing groups 'lnk', 'pth' and 'ln'. 'lnk' represents the area wich
        should be marked as a link in the text. 'pth' is the path that should
        be looked for and 'ln' is the line number in that file.
        """
        self.re = re.compile(regex, re.MULTILINE | re.VERBOSE)

    def parse(self, text):
        links = []
        for m in re.finditer(self.re, text):
            path = m.group("pth")
            line_nr = m.group("ln")
            start = m.start("lnk")
            end = m.end("lnk")
            link = Link(path, line_nr, start, end)
            links.append(link)

        return links

# gcc 'test.c:13: warning: ...'
# javac 'Test.java:13: ...'
# ruby 'test.rb:5: ...'
# scalac 'Test.scala:5: ...'
# 6g (go) 'test.go:9: ...'
REGEXP_STANDARD = r"""
^
(?P<lnk>
    (?P<pth> .*[a-z0-9] )
    \:
    (?P<ln> \d+)
)
\:\s"""

# python '  File "test.py", line 13'
REGEXP_PYTHON = r"""
^\s\sFile\s
(?P<lnk>
    \"
    (?P<pth> [^\"]+ )
    \",\sline\s
    (?P<ln> \d+ )
),"""

# python 'test.sh: line 5:'
REGEXP_BASH = r"""
^(?P<lnk>
    (?P<pth> .* )
    \:\sline\s
    (?P<ln> \d+ )
)\:"""

# valac 'Test.vala:13.1-13.3: ...'
REGEXP_VALAC = r"""
^(?P<lnk>
    (?P<pth>
        .*vala
    )
    \:
    (?P<ln>
        \d+
    )
    \.\d+-\d+\.\d+
 )\: """

#ruby
#test.rb:5: ...
#   from test.rb:3:in `each'
# fist line parsed by REGEXP_STANDARD
REGEXP_RUBY = r"""
^\s+from\s
(?P<lnk>
    (?P<pth>
        .*
    )
    \:
    (?P<ln>
        \d+
    )
 )"""

# perl 'syntax error at test.pl line 88, near "$fake_var'
REGEXP_PERL = r"""
\sat\s
(?P<lnk>
    (?P<pth> .* )
    \sline\s
    (?P<ln> \d+ )
)"""

# mcs (C#) 'Test.cs(12,7): error CS0103: The name `fakeMethod'
REGEXP_MCS = r"""
^
(?P<lnk>
    (?P<pth> .*\.[cC][sS] )
    \(
    (?P<ln> \d+ )
    ,\d+\)
)
\:\s
"""

# ex:ts=4:et: