OpenSecurity/install/web.py-0.37/web/browser.py
author om
Mon, 02 Dec 2013 14:02:05 +0100
changeset 3 65432e6c6042
permissions -rwxr-xr-x
initial deployment and project layout commit
     1 """Browser to test web applications.
     2 (from web.py)
     3 """
     4 from utils import re_compile
     5 from net import htmlunquote
     6 
     7 import httplib, urllib, urllib2
     8 import copy
     9 from StringIO import StringIO
    10 
    11 DEBUG = False
    12 
    13 __all__ = [
    14     "BrowserError",
    15     "Browser", "AppBrowser",
    16     "AppHandler"
    17 ]
    18 
    19 class BrowserError(Exception):
    20     pass
    21 
    22 class Browser:
    23     def __init__(self):
    24         import cookielib
    25         self.cookiejar = cookielib.CookieJar()
    26         self._cookie_processor = urllib2.HTTPCookieProcessor(self.cookiejar)
    27         self.form = None
    28 
    29         self.url = "http://0.0.0.0:8080/"
    30         self.path = "/"
    31         
    32         self.status = None
    33         self.data = None
    34         self._response = None
    35         self._forms = None
    36 
    37     def reset(self):
    38         """Clears all cookies and history."""
    39         self.cookiejar.clear()
    40 
    41     def build_opener(self):
    42         """Builds the opener using urllib2.build_opener. 
    43         Subclasses can override this function to prodive custom openers.
    44         """
    45         return urllib2.build_opener()
    46 
    47     def do_request(self, req):
    48         if DEBUG:
    49             print 'requesting', req.get_method(), req.get_full_url()
    50         opener = self.build_opener()
    51         opener.add_handler(self._cookie_processor)
    52         try:
    53             self._response = opener.open(req)
    54         except urllib2.HTTPError, e:
    55             self._response = e
    56 
    57         self.url = self._response.geturl()
    58         self.path = urllib2.Request(self.url).get_selector()
    59         self.data = self._response.read()
    60         self.status = self._response.code
    61         self._forms = None
    62         self.form = None
    63         return self.get_response()
    64 
    65     def open(self, url, data=None, headers={}):
    66         """Opens the specified url."""
    67         url = urllib.basejoin(self.url, url)
    68         req = urllib2.Request(url, data, headers)
    69         return self.do_request(req)
    70 
    71     def show(self):
    72         """Opens the current page in real web browser."""
    73         f = open('page.html', 'w')
    74         f.write(self.data)
    75         f.close()
    76 
    77         import webbrowser, os
    78         url = 'file://' + os.path.abspath('page.html')
    79         webbrowser.open(url)
    80 
    81     def get_response(self):
    82         """Returns a copy of the current response."""
    83         return urllib.addinfourl(StringIO(self.data), self._response.info(), self._response.geturl())
    84 
    85     def get_soup(self):
    86         """Returns beautiful soup of the current document."""
    87         import BeautifulSoup
    88         return BeautifulSoup.BeautifulSoup(self.data)
    89 
    90     def get_text(self, e=None):
    91         """Returns content of e or the current document as plain text."""
    92         e = e or self.get_soup()
    93         return ''.join([htmlunquote(c) for c in e.recursiveChildGenerator() if isinstance(c, unicode)])
    94 
    95     def _get_links(self):
    96         soup = self.get_soup()
    97         return [a for a in soup.findAll(name='a')]
    98         
    99     def get_links(self, text=None, text_regex=None, url=None, url_regex=None, predicate=None):
   100         """Returns all links in the document."""
   101         return self._filter_links(self._get_links(),
   102             text=text, text_regex=text_regex, url=url, url_regex=url_regex, predicate=predicate)
   103 
   104     def follow_link(self, link=None, text=None, text_regex=None, url=None, url_regex=None, predicate=None):
   105         if link is None:
   106             links = self._filter_links(self.get_links(),
   107                 text=text, text_regex=text_regex, url=url, url_regex=url_regex, predicate=predicate)
   108             link = links and links[0]
   109             
   110         if link:
   111             return self.open(link['href'])
   112         else:
   113             raise BrowserError("No link found")
   114             
   115     def find_link(self, text=None, text_regex=None, url=None, url_regex=None, predicate=None):
   116         links = self._filter_links(self.get_links(), 
   117             text=text, text_regex=text_regex, url=url, url_regex=url_regex, predicate=predicate)
   118         return links and links[0] or None
   119             
   120     def _filter_links(self, links, 
   121             text=None, text_regex=None,
   122             url=None, url_regex=None,
   123             predicate=None):
   124         predicates = []
   125         if text is not None:
   126             predicates.append(lambda link: link.string == text)
   127         if text_regex is not None:
   128             predicates.append(lambda link: re_compile(text_regex).search(link.string or ''))
   129         if url is not None:
   130             predicates.append(lambda link: link.get('href') == url)
   131         if url_regex is not None:
   132             predicates.append(lambda link: re_compile(url_regex).search(link.get('href', '')))
   133         if predicate:
   134             predicate.append(predicate)
   135 
   136         def f(link):
   137             for p in predicates:
   138                 if not p(link):
   139                     return False
   140             return True
   141 
   142         return [link for link in links if f(link)]
   143 
   144     def get_forms(self):
   145         """Returns all forms in the current document.
   146         The returned form objects implement the ClientForm.HTMLForm interface.
   147         """
   148         if self._forms is None:
   149             import ClientForm
   150             self._forms = ClientForm.ParseResponse(self.get_response(), backwards_compat=False)
   151         return self._forms
   152 
   153     def select_form(self, name=None, predicate=None, index=0):
   154         """Selects the specified form."""
   155         forms = self.get_forms()
   156 
   157         if name is not None:
   158             forms = [f for f in forms if f.name == name]
   159         if predicate:
   160             forms = [f for f in forms if predicate(f)]
   161             
   162         if forms:
   163             self.form = forms[index]
   164             return self.form
   165         else:
   166             raise BrowserError("No form selected.")
   167         
   168     def submit(self, **kw):
   169         """submits the currently selected form."""
   170         if self.form is None:
   171             raise BrowserError("No form selected.")
   172         req = self.form.click(**kw)
   173         return self.do_request(req)
   174 
   175     def __getitem__(self, key):
   176         return self.form[key]
   177 
   178     def __setitem__(self, key, value):
   179         self.form[key] = value
   180 
   181 class AppBrowser(Browser):
   182     """Browser interface to test web.py apps.
   183     
   184         b = AppBrowser(app)
   185         b.open('/')
   186         b.follow_link(text='Login')
   187         
   188         b.select_form(name='login')
   189         b['username'] = 'joe'
   190         b['password'] = 'secret'
   191         b.submit()
   192 
   193         assert b.path == '/'
   194         assert 'Welcome joe' in b.get_text()
   195     """
   196     def __init__(self, app):
   197         Browser.__init__(self)
   198         self.app = app
   199 
   200     def build_opener(self):
   201         return urllib2.build_opener(AppHandler(self.app))
   202 
   203 class AppHandler(urllib2.HTTPHandler):
   204     """urllib2 handler to handle requests using web.py application."""
   205     handler_order = 100
   206 
   207     def __init__(self, app):
   208         self.app = app
   209 
   210     def http_open(self, req):
   211         result = self.app.request(
   212             localpart=req.get_selector(),
   213             method=req.get_method(),
   214             host=req.get_host(),
   215             data=req.get_data(),
   216             headers=dict(req.header_items()),
   217             https=req.get_type() == "https"
   218         )
   219         return self._make_response(result, req.get_full_url())
   220 
   221     def https_open(self, req):
   222         return self.http_open(req)
   223     
   224     try:
   225         https_request = urllib2.HTTPHandler.do_request_
   226     except AttributeError:
   227         # for python 2.3
   228         pass
   229 
   230     def _make_response(self, result, url):
   231         data = "\r\n".join(["%s: %s" % (k, v) for k, v in result.header_items])
   232         headers = httplib.HTTPMessage(StringIO(data))
   233         response = urllib.addinfourl(StringIO(result.data), headers, url)
   234         code, msg = result.status.split(None, 1)
   235         response.code, response.msg = int(code), msg
   236         return response