diff --git a/splash/benchmark/benchmark.py b/splash/benchmark/benchmark.py index 2ba766fc7..b69a5209e 100755 --- a/splash/benchmark/benchmark.py +++ b/splash/benchmark/benchmark.py @@ -20,7 +20,7 @@ import re import requests -from splash.benchmark.file_server import serve_files +from splash.benchmark.download_sites import serve_files from splash.tests.utils import SplashServer @@ -68,8 +68,10 @@ def make_render_png_lua_req(splash, params): PORT = 8806 #: Combinations of width & height to test. WIDTH_HEIGHT = [(None, None), (500, None), (None, 500), (500, 500)] -#: Splash log filename. +#: Splash log filename (set to None to put it to stdout). SPLASH_LOG = 'splash.log' +#: Static file server log filename (set to None to put it to stdout). +FILESERVER_LOG = 'fileserver.log' #: This script is used to collect maxrss & cpu time from splash process. GET_PERF_STATS_SCRIPT = """ function main(splash) @@ -172,7 +174,8 @@ def main(): '--disable-xvfb', '--max-timeout=600']) - with splash, serve_files(PORT, args.sites_dir): + with splash, serve_files(port=PORT, directory=args.sites_dir, + logfile=FILESERVER_LOG): start_time = time() results = parallel_map(invoke_request, generate_requests(splash, args), args.thread_count) diff --git a/splash/benchmark/download_sites.py b/splash/benchmark/download_sites.py index e50fee33f..616e270e7 100755 --- a/splash/benchmark/download_sites.py +++ b/splash/benchmark/download_sites.py @@ -4,6 +4,7 @@ Site downloader script for Splash benchmark suite. """ +from contextlib import contextmanager from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter import errno import json @@ -11,11 +12,12 @@ import re import subprocess from urlparse import urlsplit +import time +import requests from lxml import html import w3lib.html -from splash.benchmark.file_server import serve_files from splash.tests.stress import lua_runonce SCRIPT_HTML = """ @@ -72,6 +74,31 @@ def preprocess_main_page(sites_dir, url): return filename +@contextmanager +def serve_files(port, directory, logfile=None): + """Serve files from specified directory statically in a subprocess.""" + command = ['twistd', + '-n', # don't daemonize + 'web', # start web component + '--port', str(int(port)), + '--path', os.path.abspath(directory), ] + if logfile is not None: + command += ['--logfile', logfile] + site_server = subprocess.Popen(command) + try: + # It might take some time to bring up the server, wait for up to 10s. + for i in xrange(100): + try: + requests.get('http://localhost:%d' % port) + except requests.ConnectionError: + time.sleep(0.1) + else: + break + yield + finally: + site_server.terminate() + + def download_sites(sites_dir, sites): local_files = [preprocess_main_page(sites_dir, s) for s in sites] diff --git a/splash/benchmark/file_server.py b/splash/benchmark/file_server.py deleted file mode 100755 index 2931f41ae..000000000 --- a/splash/benchmark/file_server.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python - -""" -Simple static file server. -""" - -import argparse -import os -import SimpleHTTPServer -import SocketServer -import subprocess -from contextlib import contextmanager - - -parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument('port', type=int, help='Port number to listen at') -parser.add_argument('directory', type=str, help='Directory to serve') - - -class ReusingTCPServer(SocketServer.TCPServer): - allow_reuse_address = True - - -class RequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler): - def address_string(self): - return "fileserver" - - -@contextmanager -def serve_files(port, directory): - """Serve files from current directory statically in a subprocess.""" - site_server = subprocess.Popen(['python', '-m', __name__, - str(port), directory]) - try: - yield - finally: - site_server.terminate() - - -if __name__ == '__main__': - args = parser.parse_args() - os.chdir(args.directory) - server = ReusingTCPServer(("", args.port), RequestHandler) - server.serve_forever()