OpenSecurity/install/web.py-0.37/web/browser.py
author om
Mon, 02 Dec 2013 14:02:05 +0100
changeset 3 65432e6c6042
permissions -rwxr-xr-x
initial deployment and project layout commit
om@3
     1
"""Browser to test web applications.
om@3
     2
(from web.py)
om@3
     3
"""
om@3
     4
from utils import re_compile
om@3
     5
from net import htmlunquote
om@3
     6
om@3
     7
import httplib, urllib, urllib2
om@3
     8
import copy
om@3
     9
from StringIO import StringIO
om@3
    10
om@3
    11
DEBUG = False
om@3
    12
om@3
    13
__all__ = [
om@3
    14
    "BrowserError",
om@3
    15
    "Browser", "AppBrowser",
om@3
    16
    "AppHandler"
om@3
    17
]
om@3
    18
om@3
    19
class BrowserError(Exception):
om@3
    20
    pass
om@3
    21
om@3
    22
class Browser:
om@3
    23
    def __init__(self):
om@3
    24
        import cookielib
om@3
    25
        self.cookiejar = cookielib.CookieJar()
om@3
    26
        self._cookie_processor = urllib2.HTTPCookieProcessor(self.cookiejar)
om@3
    27
        self.form = None
om@3
    28
om@3
    29
        self.url = "http://0.0.0.0:8080/"
om@3
    30
        self.path = "/"
om@3
    31
        
om@3
    32
        self.status = None
om@3
    33
        self.data = None
om@3
    34
        self._response = None
om@3
    35
        self._forms = None
om@3
    36
om@3
    37
    def reset(self):
om@3
    38
        """Clears all cookies and history."""
om@3
    39
        self.cookiejar.clear()
om@3
    40
om@3
    41
    def build_opener(self):
om@3
    42
        """Builds the opener using urllib2.build_opener. 
om@3
    43
        Subclasses can override this function to prodive custom openers.
om@3
    44
        """
om@3
    45
        return urllib2.build_opener()
om@3
    46
om@3
    47
    def do_request(self, req):
om@3
    48
        if DEBUG:
om@3
    49
            print 'requesting', req.get_method(), req.get_full_url()
om@3
    50
        opener = self.build_opener()
om@3
    51
        opener.add_handler(self._cookie_processor)
om@3
    52
        try:
om@3
    53
            self._response = opener.open(req)
om@3
    54
        except urllib2.HTTPError, e:
om@3
    55
            self._response = e
om@3
    56
om@3
    57
        self.url = self._response.geturl()
om@3
    58
        self.path = urllib2.Request(self.url).get_selector()
om@3
    59
        self.data = self._response.read()
om@3
    60
        self.status = self._response.code
om@3
    61
        self._forms = None
om@3
    62
        self.form = None
om@3
    63
        return self.get_response()
om@3
    64
om@3
    65
    def open(self, url, data=None, headers={}):
om@3
    66
        """Opens the specified url."""
om@3
    67
        url = urllib.basejoin(self.url, url)
om@3
    68
        req = urllib2.Request(url, data, headers)
om@3
    69
        return self.do_request(req)
om@3
    70
om@3
    71
    def show(self):
om@3
    72
        """Opens the current page in real web browser."""
om@3
    73
        f = open('page.html', 'w')
om@3
    74
        f.write(self.data)
om@3
    75
        f.close()
om@3
    76
om@3
    77
        import webbrowser, os
om@3
    78
        url = 'file://' + os.path.abspath('page.html')
om@3
    79
        webbrowser.open(url)
om@3
    80
om@3
    81
    def get_response(self):
om@3
    82
        """Returns a copy of the current response."""
om@3
    83
        return urllib.addinfourl(StringIO(self.data), self._response.info(), self._response.geturl())
om@3
    84
om@3
    85
    def get_soup(self):
om@3
    86
        """Returns beautiful soup of the current document."""
om@3
    87
        import BeautifulSoup
om@3
    88
        return BeautifulSoup.BeautifulSoup(self.data)
om@3
    89
om@3
    90
    def get_text(self, e=None):
om@3
    91
        """Returns content of e or the current document as plain text."""
om@3
    92
        e = e or self.get_soup()
om@3
    93
        return ''.join([htmlunquote(c) for c in e.recursiveChildGenerator() if isinstance(c, unicode)])
om@3
    94
om@3
    95
    def _get_links(self):
om@3
    96
        soup = self.get_soup()
om@3
    97
        return [a for a in soup.findAll(name='a')]
om@3
    98
        
om@3
    99
    def get_links(self, text=None, text_regex=None, url=None, url_regex=None, predicate=None):
om@3
   100
        """Returns all links in the document."""
om@3
   101
        return self._filter_links(self._get_links(),
om@3
   102
            text=text, text_regex=text_regex, url=url, url_regex=url_regex, predicate=predicate)
om@3
   103
om@3
   104
    def follow_link(self, link=None, text=None, text_regex=None, url=None, url_regex=None, predicate=None):
om@3
   105
        if link is None:
om@3
   106
            links = self._filter_links(self.get_links(),
om@3
   107
                text=text, text_regex=text_regex, url=url, url_regex=url_regex, predicate=predicate)
om@3
   108
            link = links and links[0]
om@3
   109
            
om@3
   110
        if link:
om@3
   111
            return self.open(link['href'])
om@3
   112
        else:
om@3
   113
            raise BrowserError("No link found")
om@3
   114
            
om@3
   115
    def find_link(self, text=None, text_regex=None, url=None, url_regex=None, predicate=None):
om@3
   116
        links = self._filter_links(self.get_links(), 
om@3
   117
            text=text, text_regex=text_regex, url=url, url_regex=url_regex, predicate=predicate)
om@3
   118
        return links and links[0] or None
om@3
   119
            
om@3
   120
    def _filter_links(self, links, 
om@3
   121
            text=None, text_regex=None,
om@3
   122
            url=None, url_regex=None,
om@3
   123
            predicate=None):
om@3
   124
        predicates = []
om@3
   125
        if text is not None:
om@3
   126
            predicates.append(lambda link: link.string == text)
om@3
   127
        if text_regex is not None:
om@3
   128
            predicates.append(lambda link: re_compile(text_regex).search(link.string or ''))
om@3
   129
        if url is not None:
om@3
   130
            predicates.append(lambda link: link.get('href') == url)
om@3
   131
        if url_regex is not None:
om@3
   132
            predicates.append(lambda link: re_compile(url_regex).search(link.get('href', '')))
om@3
   133
        if predicate:
om@3
   134
            predicate.append(predicate)
om@3
   135
om@3
   136
        def f(link):
om@3
   137
            for p in predicates:
om@3
   138
                if not p(link):
om@3
   139
                    return False
om@3
   140
            return True
om@3
   141
om@3
   142
        return [link for link in links if f(link)]
om@3
   143
om@3
   144
    def get_forms(self):
om@3
   145
        """Returns all forms in the current document.
om@3
   146
        The returned form objects implement the ClientForm.HTMLForm interface.
om@3
   147
        """
om@3
   148
        if self._forms is None:
om@3
   149
            import ClientForm
om@3
   150
            self._forms = ClientForm.ParseResponse(self.get_response(), backwards_compat=False)
om@3
   151
        return self._forms
om@3
   152
om@3
   153
    def select_form(self, name=None, predicate=None, index=0):
om@3
   154
        """Selects the specified form."""
om@3
   155
        forms = self.get_forms()
om@3
   156
om@3
   157
        if name is not None:
om@3
   158
            forms = [f for f in forms if f.name == name]
om@3
   159
        if predicate:
om@3
   160
            forms = [f for f in forms if predicate(f)]
om@3
   161
            
om@3
   162
        if forms:
om@3
   163
            self.form = forms[index]
om@3
   164
            return self.form
om@3
   165
        else:
om@3
   166
            raise BrowserError("No form selected.")
om@3
   167
        
om@3
   168
    def submit(self, **kw):
om@3
   169
        """submits the currently selected form."""
om@3
   170
        if self.form is None:
om@3
   171
            raise BrowserError("No form selected.")
om@3
   172
        req = self.form.click(**kw)
om@3
   173
        return self.do_request(req)
om@3
   174
om@3
   175
    def __getitem__(self, key):
om@3
   176
        return self.form[key]
om@3
   177
om@3
   178
    def __setitem__(self, key, value):
om@3
   179
        self.form[key] = value
om@3
   180
om@3
   181
class AppBrowser(Browser):
om@3
   182
    """Browser interface to test web.py apps.
om@3
   183
    
om@3
   184
        b = AppBrowser(app)
om@3
   185
        b.open('/')
om@3
   186
        b.follow_link(text='Login')
om@3
   187
        
om@3
   188
        b.select_form(name='login')
om@3
   189
        b['username'] = 'joe'
om@3
   190
        b['password'] = 'secret'
om@3
   191
        b.submit()
om@3
   192
om@3
   193
        assert b.path == '/'
om@3
   194
        assert 'Welcome joe' in b.get_text()
om@3
   195
    """
om@3
   196
    def __init__(self, app):
om@3
   197
        Browser.__init__(self)
om@3
   198
        self.app = app
om@3
   199
om@3
   200
    def build_opener(self):
om@3
   201
        return urllib2.build_opener(AppHandler(self.app))
om@3
   202
om@3
   203
class AppHandler(urllib2.HTTPHandler):
om@3
   204
    """urllib2 handler to handle requests using web.py application."""
om@3
   205
    handler_order = 100
om@3
   206
om@3
   207
    def __init__(self, app):
om@3
   208
        self.app = app
om@3
   209
om@3
   210
    def http_open(self, req):
om@3
   211
        result = self.app.request(
om@3
   212
            localpart=req.get_selector(),
om@3
   213
            method=req.get_method(),
om@3
   214
            host=req.get_host(),
om@3
   215
            data=req.get_data(),
om@3
   216
            headers=dict(req.header_items()),
om@3
   217
            https=req.get_type() == "https"
om@3
   218
        )
om@3
   219
        return self._make_response(result, req.get_full_url())
om@3
   220
om@3
   221
    def https_open(self, req):
om@3
   222
        return self.http_open(req)
om@3
   223
    
om@3
   224
    try:
om@3
   225
        https_request = urllib2.HTTPHandler.do_request_
om@3
   226
    except AttributeError:
om@3
   227
        # for python 2.3
om@3
   228
        pass
om@3
   229
om@3
   230
    def _make_response(self, result, url):
om@3
   231
        data = "\r\n".join(["%s: %s" % (k, v) for k, v in result.header_items])
om@3
   232
        headers = httplib.HTTPMessage(StringIO(data))
om@3
   233
        response = urllib.addinfourl(StringIO(result.data), headers, url)
om@3
   234
        code, msg = result.status.split(None, 1)
om@3
   235
        response.code, response.msg = int(code), msg
om@3
   236
        return response