Quixote 1.2源码解读
要说quixote1.2真是老古董了,官网都已经不提供下载了(只维护1.3和2.x)。鉴于历史原因,公司暂时是不会做quixote升级的,而且就目前来看,完全没有问题。
下载源代码,主要看publish.py
有个publish方法:
def publish(self, stdin, stdout, stderr, env): """publish(stdin : file, stdout : file, stderr : file, env : dict) Create an HTTPRequest object from the environment and from standard input, process it, and write the response to standard output. stdin和stdout跟web服务器有关。比如使用gunicorn跑quixote,那stdin就是gunicorn的一个对象,如果是用cgi直接跑则是系统的stdin。env来自cgi。 """ request = self.create_request(stdin, env) output = self.process_request(request, env) # Output results from Response object if output: request.response.set_body(output) try: request.response.write(stdout) except IOError, exc: self.log('IOError caught while writing request (%s)' % exc) self._clear_request()
可以简单看一下create_request方法:
def create_request(self, stdin, env): ctype = get_content_type(env) if ctype == "multipart/form-data" and ( # workaround for safari bug env.get('REQUEST_METHOD') != 'GET' or env.get('CONTENT_LENGTH', '0') != '0' ): req = HTTPUploadRequest(stdin, env, content_type=ctype) req.set_upload_dir(self.config.upload_dir, self.config.upload_dir_mode) return req else: return HTTPRequest(stdin, env, content_type=ctype)
然后核心就是process_request方法:
def process_request(self, request, env): """process_request(request : HTTPRequest, env : dict) : string Process a single request, given an HTTPRequest object. The try_publish() method will be called to do the work and exceptions will be handled here. """ self._set_request(request) try: self.parse_request(request) output = self.try_publish(request, env.get('PATH_INFO', '')) except errors.PublishError, exc: # Exit the publishing loop and return a result right away. output = self.finish_interrupted_request(request, exc) except: # Some other exception, generate error messages to the logs, etc. output = self.finish_failed_request(request) output = self.filter_output(request, output) self.log_request(request) return output
先看看parse_request方法,调用了http_request.py中的代码:
def process_inputs(self): """Process request inputs. """ self.start_time = time.time() if self.get_method() != 'GET': # Avoid consuming the contents of stdin unless we're sure # there's actually form data. if self.content_type == "multipart/form-data": raise RuntimeError( "cannot handle multipart/form-data requests") elif self.content_type == "application/x-www-form-urlencoded": fp = self.stdin else: return else: fp = None fs = FieldStorage(fp=fp, environ=self.environ, keep_blank_values=1) if fs.list: for item in fs.list: self.add_form_value(item.name, item.value)
注意add_form_value方法中防止hash攻击:
def add_form_value(self, key, value): if self.form.has_key(key): found = self.form[key] if type(found) is ListType: found.append(value) elif found != value: found = [found, value] self.form[key] = found else: self.form[key] = value # anti hash attack: # http://permalink.gmane.org/gmane.comp.security.full-disclosure/83694 if len(self.form) % 50 == 0: hash_d = {} for k in self.form: h = hash(k) hash_d[h] = hash_d.get(h, 0) + 1 m = max(hash_d.values()) if m > len(self.form) / 10 or m > 10 or len(self.form) > 10000: raise errors.RequestError("hash attack")
回到publish.py中,先看finish_interrupted_request方法:
def finish_interrupted_request(self, request, exc): request.response = HTTPResponse() # set response status code so every custom doesn't have to do it request.response.set_status(exc.status_code) if self.config.secure_errors and exc.private_msg: exc.private_msg = None # hide it # walk up stack and find handler for the exception stack = self.namespace_stack[:] while 1: handler = None while stack: object = stack.pop() if hasattr(object, "_q_exception_handler"): handler = object._q_exception_handler break if handler is None: handler = errors.default_exception_handler try: return handler(request, exc) except errors.PublishError: assert handler is not errors.default_exception_handler continue # exception was re-raised or another exception occured
当发生PublishError的时候,从namespace_stack(后面有解释)后面的元素(任何namespace,可以是object,module)开始依次往前找_q_exception_handler,找到则进行异常处理,遍历完namespace_stack后都没找到则使用errors.py中默认的default_excpetion_handler。如果找到了且在处理的过程中继续抛出了PublishError,继续寻找_q_exception_handler。但是,要保证不是default_exception_handler抛出的PublishError,因为默认的异常处理方法是(确保)不可能出现PublishError的(如果发生了,也没有别的异常处理代码能解决了)。
filter_output方法中判断如果支持response内容的压缩,则调用compress_output:
def compress_output(self, request, output): encoding = request.get_encoding(["gzip", "x-gzip"]) n = len(output) if n > self._GZIP_THRESHOLD and encoding: co = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, 0) chunks = [self._GZIP_HEADER, co.compress(output), co.flush(), struct.pack("<ll", binascii.crc32(output), len(output))] output = "".join(chunks) #self.log("gzip (original size %d, ratio %.1f)" % # (n, float(n)/len(output))) request.response.set_header("Content-Encoding", encoding) return output
这里有个threshold,只有内容大于200字节才做压缩,否则认为不划算。
至此就剩下一个最关键的try_publish方法还没看了:
def try_publish(self, request, path): self.start_request(request) self.namespace_stack = [] # Traverse package to a (hopefully-) callable object object = _traverse_url(self.root_namespace, path, request, self.config.fix_trailing_slash, self.namespace_stack) # None means no output -- traverse_url() just issued a redirect. if object is None: return None # Anything else must be either a string... if isstring(object): output = object # ...or a callable. elif callable(object) or hasattr(object, "__call__"): try: if callable(object): output = object(request) else: output = object.__call__(request) except SystemExit: output = "SystemExit exception caught, shutting down" self.log(output) self.exit_now = 1 if output is None: raise RuntimeError, 'callable %s returned None' % repr(object) # Uh-oh: 'object' is neither a string nor a callable. else: raise RuntimeError( "object is neither callable nor a string: %s" % repr(object)) # The callable ran OK, commit any changes to the session self.finish_successful_request(request) return output
准确来说,quixote是按namespace来分发请求的,而不是纯文件目录。如果访问www.site.com/blog/articles/100001/,怎样做url分发呢?
url分发的目标就是找到controller层的一个方法(可调用对象),调用然后得到请求的输出。在quixote中,www.site.com/blog/articles/100001/的request_path="blog/articles/100001/",于是以斜杆为分割符,依次寻找namespace知道找到目标调用对象。于是先在controller层的根目录(有的是应用的根目录,视项目情况定)寻找blog,找到blog对应的namespaceA,然后在A下寻找articles,假设为B,然后在B中寻找100001。。。
从代码中可以看出,寻找object对象是由_traverse_url方法来完成的:
def _traverse_url(root_namespace, path, request, fix_trailing_slash, namespace_stack): if (not path and fix_trailing_slash): request.redirect(request.environ['SCRIPT_NAME'] + '/' , permanent=1) return None # replace repeated slashes with a single slash if path.find("//") != -1: path = _slash_pat.sub("/", path) # split path apart; /foo/bar/baz -> ['foo', 'bar', 'baz'] # /foo/bar/ -> ['foo', 'bar', ''] path_components = path[1:].split('/') # Traverse starting at the root object = root_namespace namespace_stack.append(object) # Loop over the components of the path for component in path_components: if component == "": # "/q/foo/" == "/q/foo/_q_index" if (callable(object) or isstring(object)) and \ request.get_method() == "GET" and fix_trailing_slash: query = request.environ.get('QUERY_STRING', '') query = query and "?" + query request.redirect(request.get_path()[:-1] + query, permanent=1) return None component = "_q_index" object = _get_component(object, component, path, request, namespace_stack) if not (isstring(object) or callable(object) or hasattr(object, '__call__')): # We went through all the components of the path and ended up at # something which isn't callable, like a module or an instance # without a __call__ method. if path[-1] != '/' : _obj = _get_component(object, "_q_index", path, request, namespace_stack) if (callable(_obj) or isstring(_obj)) and \ request.get_method() == "GET" and fix_trailing_slash: # This is for the convenience of users who type in paths. # Repair the path and redirect. This should not happen for # URLs within the site. query = request.environ.get('QUERY_STRING', '') query = query and "?" + query request.redirect(request.get_path() + "/" + query, permanent=1) return None raise errors.TraversalError( "object is neither callable nor string", private_msg=repr(object), path=path) return object
可见,如果是component是""(请求以/结尾),则试着寻找_q_index方法。这里做了一个patch,如果fix_trailing_slash等条件为true,则重定向。
核心方法还是_get_component
def _get_component(container, component, path, request, namespace_stack): if not hasattr(container, '_q_exports'): raise errors.TraversalError( private_msg="%r has no _q_exports list" % container) if hasattr(container, '_q_access'): # will raise AccessError if access failed container._q_access(request) if component in container._q_exports or component == '_q_index': internal_name = component else: # check for an explicit external to internal mapping for value in container._q_exports: if type(value) is types.TupleType: if value[0] == component: internal_name = value[1] break else: internal_name = None if internal_name is None: # Component is not in exports list. object = None if hasattr(container, "_q_lookup"): object = container._q_lookup(request, component) elif hasattr(container, "_q_getname"): warnings.warn("_q_getname() on %s used; should " "be replaced by _q_lookup()" % type(container)) object = container._q_getname(request, component) if object is None: raise errors.TraversalError( private_msg="object %r has no attribute %r" % ( container, component)) # From here on, you can assume that the internal_name is not None elif hasattr(container, internal_name): # attribute is in _q_exports and exists object = getattr(container, internal_name) elif internal_name == '_q_index': if hasattr(container, "_q_lookup"): object = container._q_lookup(request, "") else: raise errors.AccessError( private_msg=("_q_index not found in %r" % container)) elif hasattr(container, "_q_resolve"): object = container._q_resolve(internal_name) if object is None: raise RuntimeError, ("component listed in _q_exports, " "but not returned by _q_resolve(%r)" % internal_name) else: # Set the object, so _q_resolve won't need to be called again. setattr(container, internal_name, object) elif type(container) is types.ModuleType: mod_name = container.__name__ + '.' + internal_name object = _get_module(mod_name) else: raise errors.TraversalError( private_msg=("%r in _q_exports list, " "but not found in %r" % (component,container))) namespace_stack.append(object) return object
这里都是quixote的语法,没什么好说的了。
贴的代码跟官方quixote1.2的代码有些出入,都是一些patch。
可以看看SessionManager,还不错的。