// ============================================================================ // G-WAN ApacheBench / Weighttp / HTTPerf wrapper http://gwan.ch/source/ab.c // See the benchmark tutorial: http://gwan.ch/en_apachebench_httperf.html // ---------------------------------------------------------------------------- // build: gcc -O2 ab.c -o abc -lpthread // // usage: // ./abc [SERVER_NAME] [FROM-TO:NBR[A]+STEPxITERATIONS] // ./abc .................................... help (missing URL) // ./abc gwan ............................... help (missing URL) // ./abc 127.0.0.1:8080/1.html .............. ok (without CPU/RAM statistics) // ./abc gwan 127.0.0.1:8080/1.html ......... ok (with CPU/RAM statistics) // ./abc [0-1000+10x3] 127.0.0.1:8080/1.html ok (concurrency range) // // SERVER_NAME: gwan, nginx, etc. (process name) // FROM : concurrency range start // TO : concurrency range end // NBR : number of requests per weighttp run // A : use HTTP Keep-Alives (optional) // STEP : concurrencies to skip in in range step // ITERATIONS : number of repetitions for each weighttp run // ---------------------------------------------------------------------------- // Dependency: weighttp from http://redmine.lighttpd.net/projects/weighttp/wiki // // ab.c will: // // 1) invoke Apache Benchmark (IBM), Weighttp (Lighttpd) or HTTPerf (HP) on // the [1-1000] concurrency range; // // 2) optionally collect CPU / RAM usage for the specified tested server: // "./abc gwan /", or "./abc nginx /" (will collect stats for all active // server instances, including all their processes and thread workers); // // 3) collect results in a CSV file suitable for LibreOffice charting: // http://www.documentfoundation.org/download/ // // Doing 1) and 2) in the same process reduces the overhead of using different // processes ('htop' and others consume a lot of CPU resources to report the // RAM / CPU resources usage because they do many things that we don't need): // // Client Requests per second CPU // ----------- ------------------------------ ---------------- ----- // Concurrency min ave max user kernel RAM // ----------- ------- ------- ------- ------- ------- ----- // => 30, 496307, 507626, 522668, 1047, 5943, 2.18 // ... // // Besides controlling CPU/RAM resource usage, specifying the SERVER_NAME also // lets ab.c check if the server crashed and restarted thread/process workers // during the test (yes, some do that). // ---------------------------------------------------------------------------- // If you see this system message: // // "error: connect() failed: Cannot assign requested address (99)" // // Then the Linux kernel has exhausted the available TCP port numbers and the // client tests tool (AB, or weighttp) cannot establish any further connection. // // Edit the /etc/sysctl.conf file and add the lines below: // // # avoid TIME_WAIT states on localhost with high-concurrency tests // # "error: connect() failed: Cannot assign requested address (99)" // net.ipv4.tcp_tw_reuse = 1 // net.ipv4.tcp_tw_recycle = 1 // // Then reload this system configuration file with: sysctl /etc/sysctl.conf // (more kernel tweaks are available below, look for "/etc/sysctl.conf") // ============================================================================ // ab.c HISTORY: the major number is ab.c's years of existence, started in 2009 // // 5.10.7 changes: added the free system RAM value for each concurrency step as // some servers delegate the task to the OS kernel, which does // consume system RAM rather than the server application RAM // (of course, this measure is only valid when a test is made // on a system running no other task for the test duration). // // 5.10.5 changes: some server workers (either threads or processes) crash and // are restarted using different pids. Some servers also start // thread/process workers 'on-demand' during the test. To still // collect all the CPU/RAM usage statistics of these servers we // now check if the pids list established at startup is still // relevant or needs a refresh. We now display the final count // of threads and processes if it has changed, as well as the // number of relaunches. // // 5.9.11 changes: replaced "ps -C" by "ps -A | grep -i" for the altered server // names that escaped ab.c's RAM & CPU resources collection and // reworked the code accordingly. // // 2.10.2 changes: prints sum of user/kernel CPU time, signals weighttp errors, // replaced "pidof " with "ps -C" for not found single-process. // // 2.9.26 changes: collects and logs all server's workers CPU and memory usage // (use: "ab gwan", or "ab nginx" to enable this feature). // // 2.4.20 changes: detect & report open (ab.txt output) file permission errors. // // 2.1.20 changes: added support for HTTPerf and Weighttpd as alternatives to // ApacheBench (Weighttpd is multithreaded and more desirable). // // v1.0.6 changes: corrected 64-bit platform issues and added support for gzip, // dumped a non-2xx reply on stderr for further investigations. // // v1.0.5 changes: added support for non-2xx response codes and trailing stats. // // v1.0.4 changes: initial release to test the whole 1-1,000 concurrency range. // ApacheBench only tests a given concurreny level and it does // not fully represent the capabilities of a server, nor it can // avoid the peaks (jitter) than ab.c prevents by using several // rounds for each concurrency level. // ---------------------------------------------------------------------------- // This program, written by TrustLeap.ch, is left in the public domain. // ============================================================================ // Select your benchmarking tool below: // //#define IBM_APACHEBENCH // single-thread, made better by Zeus' author //#define HP_HTTPERF // single-thread, from HP, less practical than AB #define LIGHTY_WEIGHTTP // multi-thread, made by the Lighttpd Team // faster than AB (same user interface) //http://redmine.lighttpd.net/projects/weighttp/wiki #define TRACK_ERRORS // signals HTTP errors (weighttp only) #ifdef IBM_APACHEBENCH # define CLI_NAME "ab" #elif defined HP_HTTPERF # define CLI_NAME "httperf" #elif defined LIGHTY_WEIGHTTP # define CLI_NAME "weighttp" #endif // ============================================================================ // Many server scripts in ANSI C, C#, Java and PHP are available from: // http://gwan.ch/source/ // ---- Static file ----------------------------------------------------------- // URL: "/100.html" // ---- Scala Lift ------------------------------------------------------------ // URL: "/static/100.html" // ---- Play/Scala ------------------------------------------------------------ // URL: "/hello?name=Pierre&repeat=1&color=" // ---- G-WAN/C --------------------------------------------------------------- // URL: "/?hello" // URL: "/?loan&name=Eva&amount=10000&rate=3.5&term=1" // URL: "/?loan&name=Eva&amount=10000&rate=3.5&term=10" // URL: "/?loan&name=Eva&amount=10000&rate=3.5&term=100" // URL: "/?loan&name=Eva&amount=10000&rate=3.5&term=800" // ---- Apache/PHP ------------------------------------------------------------ // URL: "/hello.php" // URL: "/loan.php?name=Eva&amount=10000&rate=3.5&term=1" // URL: "/loan.php?name=Eva&amount=10000&rate=3.5&term=10" // URL: "/loan.php?name=Eva&amount=10000&rate=3.5&term=100" // URL: "/loan.php?name=Eva&amount=10000&rate=3.5&term=800" // ---- GlassFish/Java ------------------------------------------------------- // URL: "/hello" // URL: "/loan/loan/loan.jsp?name=Eva&amount=10000&rate=3.5&term=1" // URL: "/loan/loan/loan.jsp?name=Eva&amount=10000&rate=3.5&term=10" // URL: "/loan/loan/loan.jsp?name=Eva&amount=10000&rate=3.5&term=100" // URL: "/loan/loan/loan.jsp?name=Eva&amount=10000&rate=3.5&term=800" // ---- IIS/ASP.Net C# -------------------------------------------------------- // URL: "/asp/hello.aspx"" // URL: "/asp/loan.aspx?name=Eva&amount=10000&rate=3.5&term=1" // URL: "/asp/loan.aspx?name=Eva&amount=10000&rate=3.5&term=10" // URL: "/asp/loan.aspx?name=Eva&amount=10000&rate=3.5&term=100" // URL: "/asp/loan.aspx?name=Eva&amount=10000&rate=3.5&term=800" // your locale settings will need to use a comma or a point for 'rate' // (using the wrong decimal separator will raise an exception in .Net) // URL: "/asp/loan.aspx?name=Eva&amount=10000&rate=3,5&term=1" // URL: "/asp/loan.aspx?name=Eva&amount=10000&rate=3,5&term=10" // URL: "/asp/loan.aspx?name=Eva&amount=10000&rate=3,5&term=100" // URL: "/asp/loan.aspx?name=Eva&amount=10000&rate=3,5&term=800" // ---------------------------------------------------------------------------- // Windows: // ---------------------------------------------------------------------------- // usage: define _WIN32 below and use a C compiler to compile and link a.c //#ifndef _WIN32 //# define _WIN32 //#endif #ifdef _WIN32 # pragma comment(lib, "ws2_32.lib") # define read(sock, buf, len) recv(sock, buf, len, 0) # define write(sock, buf, len) send(sock, buf, len, 0) # define close(sock) closesocket(sock) #endif // Unless you target a localhost test, don't use a Windows machine as // the client (to run ab) as the performances are really terrible (ab // does not use the 'IO completion ports' Windows proprietary APIs and // BSD socket calls are much slower under Windows than on Linux). // // G-WAN for Windows upgrades Registry system values to remove some // artificial limits (original values are just renamed), you need to // reboot after you run G-WAN for the first time to load those values. // Rebooting for each test has an effect on Windows (you are faster), // like testing after IIS 7.0 was tested (you are even faster), and // the Windows Vista 64-bit TCP/IP stack is 10% faster (for all) if // ASP.Net is *not* installed. // // Under Windows, run gwan like this: // // C:\gwan> gwan -b // // The -b flag (optional) disables G-WAN's denial of service shield, // this gives better raw performances (this is mandatory for tests // under Windows because the overhead of the Denial of Service Shield // is breaking the benchmarks). // ---------------------------------------------------------------------------- // Linux: // ---------------------------------------------------------------------------- // usage: ./gwan -r ab.c (a new instance of G-WAN will run this C source code) // // Linux Ubuntu 8.1 did not show significant boot-related side-effects // but here also I have had to tune the system (BOTH on the server and // client sides). ^^^^ // // The modification below works after a reboot (if an user is logged): // sudo gedit /etc/security/limits.conf // * soft nofile 200000 // * hard nofile 200000 // // If you are logged as 'root' in a terminal, type (instant effect): // ulimit -HSn 200000 // /* sudo gedit /etc/sysctl.conf # "Performance Scalability of a Multi-Core Web Server", Nov 2007 # Bryan Veal and Annie Foong, Intel Corporation, Page 4/10 fs.file-max = 5000000 net.core.netdev_max_backlog = 400000 net.core.optmem_max = 10000000 net.core.rmem_default = 10000000 net.core.rmem_max = 10000000 net.core.somaxconn = 100000 net.core.wmem_default = 10000000 net.core.wmem_max = 10000000 net.ipv4.conf.all.rp_filter = 1 net.ipv4.conf.default.rp_filter = 1 net.ipv4.tcp_congestion_control = bic net.ipv4.tcp_ecn = 0 net.ipv4.tcp_max syn backlog = 12000 net.ipv4.tcp_max tw buckets = 2000000 net.ipv4.tcp_mem = 30000000 30000000 30000000 net.ipv4.tcp_rmem = 30000000 30000000 30000000 net.ipv4.tcp_sack = 1 net.ipv4.tcp_syncookies = 0 net.ipv4.tcp_timestamps = 1 net.ipv4.tcp_wmem = 30000000 30000000 30000000 # optionally, avoid TIME_WAIT states on localhost no-HTTP Keep-Alive tests: # "error: connect() failed: Cannot assign requested address (99)" # On Linux, the 2MSL time is hardcoded to 60 seconds in /include/net/tcp.h: # #define TCP_TIMEWAIT_LEN (60*HZ) # The option below lets you reduce TIME_WAITs by several orders of magnitude # but this option is for benchmarks, NOT for production servers (NAT issues) net.ipv4.tcp_tw_recycle = 1 */ // # other settings found from various sources // fs.file-max = 200000 // net.ipv4.ip_local_port_range = 1024 65535 // net.ipv4.ip_forward = 0 // net.ipv4.conf.default.rp_filter = 1 // net.core.rmem_max = 262143 // net.core.rmem_default = 262143 // net.core.netdev_max_backlog = 32768 // net.core.somaxconn = 2048 // net.ipv4.tcp_rmem = 4096 131072 262143 // net.ipv4.tcp_wmem = 4096 131072 262143 // net.ipv4.tcp_sack = 0 // net.ipv4.tcp_dsack = 0 // net.ipv4.tcp_fack = 0 // net.ipv4.tcp_fin_timeout = 30 // net.ipv4.tcp_orphan_retries = 0 // net.ipv4.tcp_keepalive_time = 120 // net.ipv4.tcp_keepalive_probes = 3 // net.ipv4.tcp_keepalive_intvl = 10 // net.ipv4.tcp_retries2 = 15 // net.ipv4.tcp_retries1 = 3 // net.ipv4.tcp_synack_retries = 5 // net.ipv4.tcp_syn_retries = 5 // net.ipv4.tcp_timestamps = 0 // net.ipv4.tcp_max_tw_buckets = 32768 // net.ipv4.tcp_moderate_rcvbuf = 1 // kernel.sysrq = 0 // kernel.shmmax = 67108864 // // Use 'sudo sysctl -p /etc/sysctl.conf' to update your environment // -the command must be typed in each open terminal for the changes // to take place (same effect as a reboot). // // As I was not able to make the 'open files limit' persist for G-WAN // after a reboot, G-WAN attemps to setup this to an 'optimal' value // depending on the amount of RAM available on your system: // // fd_max = (256 * (totalram / 4) < 200000) ? 256 * (total / 4) // : 1000000; // // For this to work, you have to run gwan as 'root': // // # ./gwan // or // $ sudo ./gwan // ---------------------------------------------------------------------------- // NB: on a 1 GbE LAN and for the for 100.html test, this test was up // to 2x faster when client and server were using Linux 64-bit // (instead of Linux 32-bit) but absolute performances are less // relevant than relative server performances for me, hence the // localhost test). // // Experiments demonstrate that, for a 100-byte static file, IIS // and Apache use 90-100% of a 4-Core CPU at high concurrencies // while being much slower than G-WAN (which uses "0%" of the CPU // on a gigabit LAN). // // A low CPU usage matters because leaving free CPU resources // available for other tasks allows G-WAN to: // // - achieve better performances by not starving the system; // - make room to generate dynamic contents (C servlets); // - make room for a database, proxy, email or virtual server; // - save energy (CPUs consume more energy under high loads); // - save money (doing 20-200,000x more on each of your server). // // For a small static file such as the 100.html file, if your test // on a LAN is slower than on localhost then your environment is // the bottleneck (NICs, switch, client CPU, client OS...). // ============================================================================ #include #include #include #include #include #ifdef _WIN32 # include # include # include typedef unsigned char u8; typedef unsigned int u32; typedef unsigned __int64 u64; # define FMTU64 "I64u" #else # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include typedef unsigned char u8; typedef unsigned int u32; typedef signed long long s64; typedef unsigned long long u64; # define FMTU64 "llu" // ---------------------------------------------------------------------------- // gloabl variables // ---------------------------------------------------------------------------- volatile int ab_done = 0; // only main() thread can write it static float sys_used_ram = 0; // set at startup, once static u64 pid_ram(u32 pid); static void sys_ram(s64 *free, s64 *total, s64 *buffers, s64 *cached); // ---------------------------------------------------------------------------- // making sure we can get all the pids of any given process name: // // ps -eL | grep -i 'gwan' // // ppid pid pid name // ---- ---- -------------- // 3793 3793 ? 00:00:07 gwan // process // 3793 3794 ? 00:07:41 gwan_1 // worker threads // 3793 3795 ? 00:07:41 gwan_2 // 3793 3796 ? 00:07:41 gwan_3 // ... // ---------------------------------------------------------------------------- // find all pids of all processes/threads containing 'name' // (process pids are stored as negative integers to distinguish them from // thread pids) // ---------------------------------------------------------------------------- static int pidsof(char *name, int **pids) { if(!name || !*name || !pids) return 0; char str[4096]; sprintf(str, "ps -eL | grep -i '%s'", name); FILE *f = popen(str, "r"); if(!f) return 0; *str = 0; int len = fread(str, 1, sizeof(str) - 1, f); pclose(f); if(!len) return 0; int *n = *pids = (int*)malloc(sizeof(int) * 512), nbr_pids = 0, pid = 0, ppid = 0; char *p = str, *pp, *e; while(*p) { while(*p && *p == ' ') p++; // pass blanks pp = p; while(*p && *p != ' ') p++; // pass parent pid & close string *p++ = 0; // close string ppid = atoi(pp); // get parent pid if(!ppid) break; // done e = p; // point to the child pid while(*e && *e == ' ') e++; // pass child pid while(*e && *e != ' ') e++; if(*e == ' ') *e = 0; // close child pid string // if parent pid == child pid then that's a process pid // (else that's a thread pid) const int pid = atoi(p); const int thread = (ppid != pid); n[nbr_pids++] = pid * (thread ? 1 : -1); //printf("%d] ppid:%d pid:%d\n", nbr_pids -1, ppid, pid); p = e + 1; while(*p != '\n') p++; // pass the rest of the line if(*p) p++; //printf("pid[%d]: %d\n", nbr_pids -1, n[nbr_pids -1]); } *pids = (int*)realloc(*pids, sizeof(int) * nbr_pids); return nbr_pids; } // ---------------------------------------------------------------------------- // wait 'n' milliseconds // ---------------------------------------------------------------------------- static void msdelay(u32 milisec) { struct timespec req; time_t sec = (u32)(milisec / 1000); milisec = milisec - (sec * 1000); req.tv_sec = sec; req.tv_nsec = milisec * 1000000L; while(nanosleep(&req, &req) == -1) continue; } // ---------------------------------------------------------------------------- // convert a string into an integer // ---------------------------------------------------------------------------- static u64 atou64(const u8 *s) { u64 v = 0, c; while(*s == ' ' || *s == '\t') *s++; while((u64)(c = *s++ - '0') < 10u) v = v * 10 + c; return v; } // ---------------------------------------------------------------------------- static int is_digit(char c) { return ((u8)c - '0') < 10u; } // ---------------------------------------------------------------------------- /* convert an integer into a string // ---------------------------------------------------------------------------- static char *u64toa(u64 v, char *s) { char *p = s; int offset = v; do p++, offset = offset / 10; while(offset); *p = 0; do *p-- = "0123456789"[v % 10], v = v / 10; while(v); return s; }*/ // ---------------------------------------------------------------------------- // "16:14:07" // HH:MM:SS (not thread-safe but we don't care here) // ---------------------------------------------------------------------------- static char *tm_now(void) { static char str[16] = {0}; time_t ltime = time(NULL); struct tm *tm = localtime(<ime); str[0] = '0' + (tm->tm_hour > 9 ? tm->tm_hour / 10: 0); str[1] = '0' + (tm->tm_hour > 9 ? tm->tm_hour % 10: tm->tm_hour); str[2] = ':'; str[3] = '0' + (tm->tm_min > 9 ? tm->tm_min / 10: 0); str[4] = '0' + (tm->tm_min > 9 ? tm->tm_min % 10: tm->tm_min); str[5] = ':'; str[6] = '0' + (tm->tm_sec > 9 ? tm->tm_sec / 10: 0); str[7] = '0' + (tm->tm_sec > 9 ? tm->tm_sec % 10: tm->tm_sec); str[8] = 0; return str; } // ---------------------------------------------------------------------------- // collect the CPU and RAM resources consumed by the server threads/processes, // only one time per second, WHILE the client test tool is running (doing this // AFTER the client did its job would miss the server RAM & CPU resources load) // ---------------------------------------------------------------------------- // Total server CPU usage = for_all_pids(utime + stime + cutime + cstime) // // $ man 5 proc // // utime %lu // Amount of time that this process has been scheduled in user mode, measured // in clock ticks (divide by sysconf(_SC_CLK_TCK). This includes guest time, // guest_time (time spent running a virtual CPU, see below), so that // applications that are not aware of the guest time field do not lose that // time from their calculations. // // stime %lu // Amount of time that this process has been scheduled in kernel mode, // measured in clock ticks (divide by sysconf(_SC_CLK_TCK). // // cutime %ld // Amount of time that this process's waited-for children have been scheduled // in user mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK). // (See also times(2).) This includes guest time, cguest_time (time spent // running a virtual CPU, see below). // // cstime %ld // Amount of time that this process's waited-for children have been scheduled // in kernel mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK). // ---------------------------------------------------------------------------- #ifndef _WIN32 typedef struct { s64 user, kernel; } icpu_t; #endif typedef struct { char *cpu_ram_buf; int nbr_pids, *pids; icpu_t *old_cpu; } res_args_t; static void th_resources(void *ptr) { res_args_t *arg = (res_args_t*)ptr; char *cpu_ram_buf = arg->cpu_ram_buf; int nbr_pids = arg->nbr_pids; int *pids = arg->pids; icpu_t *old_cpu = arg->old_cpu; //int count = 0; *cpu_ram_buf = 0; char str[32], buffer[1024]; FILE *f; msdelay(100); // give time for AB to warm-up the server s64 mem = 0, max_mem = 0; // in Bytes float max_sys_mem = 0; // in MBs icpu_t cpu = {0, 0}; // ------------------------------------------------------------------------- // this first loop is for the time spent by the client tool to work // ------------------------------------------------------------------------- int loop = 80; // 100 + (80 * 10 ms) < 1 second (length of the each ab shot) while(loop-- && !ab_done) // loop to track the (varying) RAM usage { int i = nbr_pids; while(i-- && !ab_done) // for each server worker thread / process { // we have to sum the memory usage of all processes but // we don't do that for threads (they don't have a memory // usage by their own: the sbrk pointer marking the end of // the addressable memory area is process-wide and shared // by all threads) if(pids[i] > 0) continue; // ignore thread pids const int pid = abs(pids[i]); // get the memory footprint of all process pids sprintf(str, "/proc/%u/stat", pid); f = fopen(str, "r"); if(!f) continue; const size_t len = fread(buffer, 1, sizeof(buffer) - 1, f); fclose(f); if(len <= 0) continue; buffer[len] = 0; /* $man proc 0: pid %d process ID comm %s executable filename, in parentheses state %c R:run, S:sleep, D:wait, Z:zombie, T:traced, W:paging ppid %d parent's PID pgrp %d process' group ID session %d process' session ID tty_nr %d tty used by the process 7: tpgid %d parent 'terminal' process' group ID flags %lu process flags (math bit: 4d, traced bit: 10d) minflt %lu minor faults that did not load a page from disk cminflt %lu minor faults that the process + children made majflt %lu major faults that loaded a page from disk cmajflt %lu major faults that process + children made 13: utime %lu jiffies that process has spent in user mode stime %lu jiffies that process has spent in kernel mode 15: cutime %ld jiffies that process + children have spent in user mode cstime %ld jiffies that process + children have spent in kernel mode priority %ld standard nice value, plus fifteen (never negative) nice %ld nice value ranges from 19 (nicest) to -19 (not nice) threads %ld Number of threads in this process (since Linux 2.6) intvaltm %ld jiffies before next SIGALRM sent due to an interval timer starttm %lu jiffies the process started after system boot vsize %lu virtual memory size in bytes 23: rss %ld nbr of pages the process has in real memory */ // puts(buffer); exit(0); 7 // pid cmdln st ppid pgrp ssid ttynr tpgid flags minflt... // ----- ----- - ---- ----- ---- ----- ----- ------- ---- // 0: // "10510 (gwan) S 2861 10383 2861 34818 10383 4202560 3256 64533 // 0 0 // 13: // 5587 23839 187 15 20 0 7 0 3048896 1764040704 // 23: // 2324 18446744073709551615 1048576 1247500 140736402151728 // 140510228127776 140510359544675 0 0 1073745920 575214 // 18446744073709551615 0 0 -1 6 char *p = buffer; // the 'pid' field p = strchr(p, ')') + 2; // find the 'state' field // D: waiting in uninterruptible disk sleep // R: running // S: sleeping in an interruptible wait // T: traced or stopped (on a signal) // W: paging // X: dead // Z: zombie //printf("pid: %d status: %c\n", abs(pids[i]), *p); if(*p >= 'D' && *p <= 'W') // track a [R]unning process { p += 2; // skip 'state' (we now point to 'ppid') // pass spaces to skip unused variables int n = 20; while(n) if(*p++ == ' ') n--; //printf("\nrss: %s\n", p); // get the physical memory used by this PROCESS mem += atou64((u8*)p) << 12llu; // convert 4096-byte pages into bytes //printf("\n[%d] phys:%.8s mem:%llu\n", pid, p, mem); } // measure the system RAM that the server may use indirectly via // kernel syscalls, caches, etc. { s64 sys_free_ram = 0, sys_total_ram = 0, sys_buff_ram = 0, sys_cach_ram = 0; sys_ram(&sys_free_ram, &sys_total_ram, &sys_buff_ram, &sys_cach_ram); float sys_mem = (sys_total_ram + sys_buff_ram + sys_cach_ram - sys_free_ram) - sys_used_ram; if(sys_mem > max_sys_mem) max_sys_mem = sys_mem; } } // while(i-- && !ab_done) // (pids loop) // we only keep the highest value found during this pass if(mem > max_mem) max_mem = mem; mem = 0; msdelay(10); // take another measure after a small pause } // ------------------------------------------------------------------------ // now the client test tool has done its job, get the (always increasing) // CPU time (in "jiffies") // ------------------------------------------------------------------------ int i = nbr_pids; while(i--) // for each server worker process { // ignore thread pids (utime/stime is per-process and identical for // all threads at any given time) if(pids[i] > 0) continue; const int pid = abs(pids[i]); s64 new_cpu_user = 0, new_cpu_system = 0; sprintf(str, "/proc/%u/stat", pid); f = fopen(str, "r"); if(!f) continue; const size_t len = fread(buffer, 1, sizeof(buffer) - 1, f); fclose(f); if(len <= 0) continue; buffer[len] = 0; char *p = strchr(buffer, ')') + 2; // find the 'state' field if(*p >= 'D' && *p <= 'W') // track a [R]unning process { p += 2; // skip 'state' (we now point to 'ppid') // pass spaces to skip unused variables int n = 10; while(n) if(*p++ == ' ') n--; // now we point on 'utime' //printf("\nbuf:%s\n\nutime:%s\n\n", buffer, p); //#define TRACE_CPU_USAGE #ifdef TRACE_CPU_USAGE // the slow way (to trace/debug): { char comm[256], state; int pid, ppid, pgrp, session, tty, tpgid, flags; ulong minflt, cminflt, majflt, cmajflt, utime, stime, cutime, cstime, counter, priority, timeout, itrealvalue, starttime, vsize, rss, rlim; sscanf(buffer, "%d (%[^)]) %c %d %d %d %d %d " "%u %lu %lu %lu %lu %lu %lu %ld " "%ld %ld %ld %lu %lu %ld %lu %lu " "%lu", &pid, comm, &state, &ppid, &pgrp, &session, &tty, &tpgid, &flags, &minflt, &cminflt, &majflt, &cmajflt, &utime, &stime, &cutime, &cstime, &counter, &priority, &timeout, &itrealvalue, &starttime, &vsize, &rss, &rlim); printf( "pid : %d\n" /* "comm : %s\n" "state : %c\n" "ppid : %d\n" "pgrp : %d\n" "session : %d\n" "tty : %d\n" "tpgid : %d\n" "flags : 0x%x\n" // flags %u (%lu before Linux 2.6.22) "minflt : %lu minor faults\n" "cminflt : %lu cumulated minor faults\n" "majflt : %lu minor faults\n" "cmajflt : %lu cumulated major faults\n" */ "utime : %ld user time jiffies\n" "stime : %ld system time jiffies\n" "cutime : %ld cumulated user time jiffies\n" "cstime : %ld cumulated system time jiffies\n" /* "priority : %ld -2:low priority to -100:high priority\n" "nice : %ld 19:low priority to -20:high priority\n" "nb_threads : %ld threads in this process (since Linux 2.6)\n" "itrealvalue : %lu jiffies before SIGALARM\n" "starttime : %lld jiffies (system uptime at start time)\n" "vsize : %lu bytes in virtual memory\n" "rss : %lu bytes in real memory\n" "rlim : %lu bytes (RSS soft limit)\n" "startcode : 0x%lx\n" "encode : 0x%lx\n" "startstack : 0x%lx\n" "kstkesp : 0x%lx stack ESP\n" "kstkeip : 0x%lx stack EIP\n" "signal : 0x%lx pending signals mask\n" "blocked : 0x%lx blocked signals mask\n" "sigignore : 0x%lx ignored signals mask\n" "sigcatch : 0x%lx caught signals mask\n" "wchan : 0x%lx waiting channel address\n" "nswap : %lu Number of pages swapped (not maintained)\n" "cnswap : %lu Cumulative nswap for child processes\n" "exit_signal : %d Signal to be sent to parent when we die\n" "processor : %d CPU number last executed on\n" "rt_priority : %u (%u Linux 2.5.19+; was %lu)\n" "policy : %u (%u Linux 2.5.19+; was %lu)\n" "IO_ticks : %llu Aggregated block I/O delays\n" "guest_time : %lu (since Linux 2.6.24)\n" "cguest_time : %ld (since Linux 2.6.24)\n" */ "\n", pid, /* comm, state, ppid, pgrp, session, tty, tpgid, flags, minflt, cminflt, majflt, cmajflt, */ utime, stime, cutime, cstime /* , counter, priority, timeout, itrealvalue, starttime, vsize, rss, rlim, startcode, endcode, startstack, kstkesp, kstkeip, signal, blocked, sigignore, sigcatch, wchan */); int static ccc = 0; if(ccc++ > 10) exit(0); } #endif // TRACE_CPU_USAGE // get 'utime' and 'stime' (CPU time consumed by process) new_cpu_user = atou64((u8*)p); while(*p != ' ') p++; p++; new_cpu_system = atou64((u8*)p); while(*p != ' ') p++; p++; // add 'cutime' and 'cstime' (CPU time consumed by children) new_cpu_user += atou64((u8*)p); while(*p != ' ') p++; p++; new_cpu_system += atou64((u8*)p); // CPU time slice consumed by all server worker processes cpu.user += new_cpu_user - old_cpu[i].user; cpu.kernel += new_cpu_system - old_cpu[i].kernel; /* printf("[%d]user old:%7lld + step:%7lld = new:%7lld (total:%7lld)\n", i, old_cpu[i].user, new_cpu_user - old_cpu[i].user, new_cpu_user, cpu.user); */ // save elapsed CPU time for next pass to start from this point old_cpu[i].user = new_cpu_user; old_cpu[i].kernel = new_cpu_system; } // if not zombie } // loop pids /* format cumulated results (user/kernel proportion) const double total = (cpu.user + cpu.kernel) / 100.; sprintf(cpu_ram_buf, "%7.02f, %7.02f, %6.02f,", // User, Kernel, MB RAM (cpu.user / total),// / nbr_cpu, // "System load" (cpu.kernel / total),// / nbr_cpu, // "System load" max_mem / (1024. * 1024.)); */ // format cumulated results (user/kernel amounts) sprintf(cpu_ram_buf, "%7lld, %7lld, %6.02f, %6.01f,", // User, Kernel, RAM cpu.user, cpu.kernel, max_mem / (1024. * 1024.), max_sys_mem / (1024. * 1024.)); // difference since test start //printf("cpu_ram_buf[%d]:%s\n", (int)strlen(cpu_ram_buf), cpu_ram_buf); } // ---------------------------------------------------------------------------- // invoke a command and fetch its output // ---------------------------------------------------------------------------- static int run_cmd(char *cmd, char *buf, int buflen) { FILE *f = popen(cmd, "r"); if(!f) { perror("!run_cmd():"); return 0; } *buf = 0; int len = fread(buf, 1, buflen, f); pclose(f); if(!*buf) return 0; buf[len] = 0; return len; } // ------------------------------------ // just a wrapper for the code above // ------------------------------------ typedef struct { char *cmd, *buf; u32 buflen; } run_cmd_t; void th_run_cmd(void *ptr) { run_cmd_t *arg = (run_cmd_t*)ptr; long len = run_cmd(arg->cmd, arg->buf, arg->buflen); pthread_exit((void*)len); } // ---------------------------------------------------------------------------- // return the file PATH of process pid // (needs 'root' privileges for 'root' processes) // ---------------------------------------------------------------------------- char *pid_path(u32 pid, char *path, int pathlen) { char str[32]; snprintf(str, sizeof(str) - 1, "/proc/%u/exe", pid); const int res = readlink(str, path, pathlen); if(res < 0) { *path = 0; //perror("pid_path(): "); // "Permission denied" } return path; } // ---------------------------------------------------------------------------- // return the version of a server (providing it supports "server -v") // ---------------------------------------------------------------------------- // gwan -v => "\nG-WAN 2.9.16 (Sep 16 2011 13:11:41)" // nginx -v => "nginx: nginx version: nginx/1.0.6" // ---------------------------------------------------------------------------- char *srv_ver(char *SERVER_NAME, char *version, int verlen) { char cmd[256]; sprintf(cmd, "%s -v 2>&1", SERVER_NAME); *version = 0; run_cmd(cmd, version, verlen); if(*version) { char *p = version; // pass padding while(*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') p++; version = p; // keep only first line of text while(*p && *p != '\n') p++; if(*p) *p = 0; // erase last '\n' return version; } return version; } // ---------------------------------------------------------------------------- // return the physical RAM used by process 'pid' // ---------------------------------------------------------------------------- static u64 pid_ram(u32 pid) { char str[32]; sprintf(str, "/proc/%u/statm", pid); FILE *f = fopen(str, "r"); if(f) { unsigned long virt = 0, phys = 0; int len = fscanf(f, "%lu %lu", &virt, &phys); fclose(f); return (u64)phys << 12llu; // convert 4096-byte pages into bytes } return 0; } // ---------------------------------------------------------------------------- // return the free/used physical RAM of the System // ---------------------------------------------------------------------------- // "Buffers": (small) short-lived RAM cache for FS metadata (name, attribute) // "Cached" : RAM used to cache file blocks for I/O (shrinks under RAM pressure) // // cat /proc/meminfo // MemTotal: 8163924 kB // MemFree: 314752 kB // Buffers: 67316 kB // Cached: 6228024 kB // ... static void sys_ram(s64 *free, s64 *total, s64 *buffers, s64 *cached) { int todo = (free != 0) + (total != 0) + (buffers != 0) + (cached != 0); FILE *f = fopen("/proc/meminfo", "r"); if(f) { char buf[80]; while(fgets(buf, sizeof(buf), f)) { if(total && !*total && !memcmp(buf, "MemTotal:", 9)) *total = atol(buf + 10), todo--; else if(free && !*free && !memcmp(buf, "MemFree:", 8)) *free = atol(buf + 9), todo--; else if(buffers && !*buffers && !memcmp(buf, "Buffers:", 8)) *buffers = atol(buf + 9), todo--; else if(cached && !*cached && !memcmp(buf, "Cached:", 7)) *cached = atol(buf + 8), todo--; if(!todo) break; } } } // ---------------------------------------------------------------------------- // print the number and type of CPUs and Cores, according to the Linux kernel // ---------------------------------------------------------------------------- // Note: with AMD CPUs recent Linux releases are reporting only half of the // CPU Cores actually present. As a result, G-WAN may look like twice as // slow (G-WAN can't reply to more requests per second than sent to it). // The workaround used in G-WAN is to run the CPUID instruction to fetch // the right number of Cores, also on AMD CPUs (which use different sets // of codes and return values than Intel CPUs, so that's different code). // This can't reasonably be done for this weighttpd wrapper as the AMD // CPUID detection is too long and relies on assembly language. // So, here we just check the Ubuntu release number and double the number // of CPU Cores when needed (see below the comment in the code). // ---------------------------------------------------------------------------- static int cpu_type(FILE *fo) { int nbr_cpu = 0, phys_cpu_id = -1, nbr_cores = 0; char buffer[1024], model[80] = {0}; FILE *f = fopen("/proc/cpuinfo", "r"); if(f) { while(fgets(buffer, sizeof(buffer), f)) { if(!strncmp(buffer, "processor\t:", 11)) nbr_cpu++; else if(!strncmp(buffer, "physical id\t:", 13)) { int id = atoi(buffer + 14); if(id > phys_cpu_id) phys_cpu_id = id; } else if(!*model && !strncmp(buffer, "model name\t:", 12)) { char *s = buffer + 13, *d = model; while(*s) { *d++ = *s; if(*s++ == ' ') // copy string removing consecutive spaces { while(*s == ' ') s++; *d++ = *s++; } } } if(!nbr_cores && !strncmp(buffer, "cpu cores\t:", 11)) nbr_cores = atoi(buffer + 12); } fclose(f); } // extra step for Linux kernel v3+ reporting half the Cores of AMD CPUs char os[1024]; int len = run_cmd("uname -a", os, sizeof(os)); if(len) { if(strstr(model, "AMD") && strstr(os, " 3.")) { fprintf(stdout, "=> Nbr-CPU-Cores x2 for Linux kernel v3+ on AMD platforms\n"); nbr_cores *= 2; } } if(nbr_cores > 0) nbr_cpu = nbr_cores; fprintf(stdout, "CPU: %d x %u-Core CPU(s) %s", phys_cpu_id >= 0 ? phys_cpu_id + 1 : 1, nbr_cores, model); fprintf(fo, "CPU: %d x %u-Core CPU(s) %s", phys_cpu_id >= 0 ? phys_cpu_id + 1 : 1, nbr_cores, model); return nbr_cpu << 16 | (nbr_cores ? nbr_cores : 1); // never 0 please! } // ---------------------------------------------------------------------------- void dump_srv_topology(char *buf, FILE *fo, char *SERVER_NAME, int *process_pid, int nbr_pids, int *pids) { sprintf(buf, "> Server '%s' process topology:\n" "---------------------------------------------\n", SERVER_NAME); fputs(buf, fo); fprintf(stdout, "%s", buf); float mem_sum = 0; int i = nbr_pids; while(i--) { const int pid = abs(pids[i]); char *p = buf + sprintf(buf, "%3d] pid:%d ", i, pid); if(pids[i] < 0) // list RAM for processes { float mem = (float)pid_ram(pid) / (1024. * 1024.); mem_sum += mem; sprintf(p, "Process RAM: %.02f MB\n", mem); *process_pid = pid; } else strcpy(p, "Thread\n"); fputs(buf, fo); fprintf(stdout, "%s", buf); } sprintf(buf, "---------------------------------------------\n" "Total '%s' server footprint: %.02f MB", SERVER_NAME, mem_sum); puts(buf); fprintf(fo, "%s\n", buf); } #endif // ---------------------------------------------------------------------------- // upgrade 'size' if string contains a 'K', or 'M' (Kilobytes or Megabytes) // ---------------------------------------------------------------------------- static u32 atou32k(const char *v) { char *p = (char*)v; u32 size = 0; for(; *p && size < UINT_MAX; p++) { switch(*p) // here we don't check that 'M' is the first/last character! { case '0' ... '9': size = (10 * size) + (*p - '0'); break; case 'K': size <<= 10; break; case 'M': size <<= 20; break; case 'k': size *= 1000; break; case 'm': size *= 1000000; break; default : return size; } } return size; } // ============================================================================ static int http_req(char *request, FILE *f); // avoid unecessary parameter-passing in helper routines // (IP[] is so large for large Domain names) char IP[80] = {0}, PORT[8] = ":80"; // default port is 80 char *SERVER_NAME = 0, *URL = 0, *RANGE = 0, KEEP_ALIVES_STR[] = "-k"; // default range and number of requests int FROM = 0, TO = 1000, STEP = 10, ITER = 3; int NBR_REQUESTS = 100000; int main(int argc, char *argv[]) { int i, j, nbr, max_rps, min_rps, ave_rps; char str[256], buf[4070], buffer[256], cpu_ram_buf[256] = {0}; time_t st = time(NULL); u64 tmax_rps = 0, tmin_rps = 0, tave_rps = 0; FILE *f; puts(" "); // ------------------------------------------------------------------------- // check program arguments // ------------------------------------------------------------------------- if(argc < 2) { help: fprintf(stderr, "usage:\n\n" "./abc [SERVER_NAME] [FROM-TO:NBR[A]+STEPxITERATIONS] " "\n\n" " SERVER_NAME: gwan, nginx, etc. (process name)\n" " FROM : concurrency range start\n" " TO : concurrency range end\n" " NBR : number of requests per weighttp run\n" " A : use HTTP Keep-Alives (optional)\n" " STEP : concurrencies to skip in in range step\n" " ITERATIONS : number of repetitions for each weighttp run\n\n" "examples:\n" " ./abc (show this help)\n" " ./abc 127.0.0.1/ (HOST+URI = URL)\n" " ./abc 127.0.0.1:8080/100.html (URL with PORT)\n" " ./abc gwan 127.0.0.1/100.html (SERVER and URL)\n" " ./abc gwan [0-1k:100k+10x3] 127.0.0.1/ (SERVER, RANGE, URL)\n\n" " (default range is: from 0 to 1000, step:10, 3 iterations,\n" " if a range is speficied, all its parameters must be set,\n" " and the order of the all the arguments must be respected)\n\n"); exit(1); } //printf("argc: %d\n", argc); if(argc == 2) // check it's a valid URL { URL = argv[1]; if(!strchr(URL, '.')) // not a domain name nor an 'IP' address { fprintf(stderr, "bad URL: %s\n\n", URL); goto help; } } // check that we have SERVER_NAME and a valid URL (and an optional range) if(argc >= 3) { if(argc == 3) // either (SERVER_NAME + URL) or (RANGE + URL) { if(argv[1][0] == '[') RANGE = argv[1], URL = argv[2]; else SERVER_NAME = argv[1], URL = argv[2]; } else if(argc == 4) // (SERVER_NAME + range + URL) { SERVER_NAME = argv[1]; RANGE = argv[2]; URL = argv[3]; } else { fputs("* too many arguments\n\n", stderr); goto help; } } // parse 'IP' and 'PORT' from 'URL' { char *p = strchr(URL, ':'); if(p) // found a port { strncpy(PORT, p, sizeof(PORT) - 1); char *q = strchr(PORT, '/'); if(q) *q = 0; *p++ = 0; // close 'IP' string and reach 'URI' strncpy(IP, URL, sizeof(IP) - 1); while(is_digit(*p)) p++; // pass PORT URL = p; } else // no port specified, use 80 as default port { p = strchr(URL, '/'); if(p - URL > sizeof(IP) - 1) { fprintf(stderr, "\n\nbad host: %s\n\n", p); goto help; } memcpy(IP, URL, p - URL); IP[p - URL] = 0; URL += p - URL; } } // parse the RANGE, if any while(RANGE) { char *pFROM = 0, *pTO = 0, *pNBR = 0, *pSTEP = 0, *pITER = 0; char *p = RANGE, *end = strchr(RANGE, ']'); pFROM = ++p; while(*p != '-' && p < end) p++; if(p == end) goto bad_range; pTO = ++p; while(*p != ':' && p < end) p++; if(p == end) goto bad_range; pNBR = ++p; while(*p != '+' && p < end) p++; if(p == end) goto bad_range; pSTEP = ++p; while(*p != 'x' && p < end) p++; if(p == end) goto bad_range; pITER = ++p; FROM = atou32k(pFROM); TO = atou32k(pTO); NBR_REQUESTS = atou32k(pNBR); STEP = atou32k(pSTEP); ITER = atou32k(pITER); // enable/disable HTTP keep-Alives if(pSTEP[-2] != 'A') *KEEP_ALIVES_STR = 0; printf("pSTEP[-2]: '%c'\n", pSTEP[-2]); break; bad_range: fputs("* bad range\n\n", stderr); goto help; } /* fprintf(stderr, "IP : %s\n", IP); fprintf(stderr, "PORT : %s\n", PORT); fprintf(stderr, "URL : %s\n", URL); fprintf(stderr, "SERVER: %s\n", SERVER_NAME); fprintf(stderr, "RANGE : %s\n", RANGE); fprintf(stderr, "FROM : %u\n", FROM); fprintf(stderr, "TO : %u\n", TO); fprintf(stderr, "NBR : %u\n", NBR_REQUESTS); fprintf(stderr, "KEEP-A: %u\n", *KEEP_ALIVES_STR != 0); fprintf(stderr, "STEP : %u\n", STEP); fprintf(stderr, "ITER : %u\n", ITER); fprintf(stderr, "\n"); exit(0); */ // ------------------------------------------------------------------------- // open an output file which name is built from the SERVER_NAME and the URL // ------------------------------------------------------------------------- FILE *fo; { char cleanURL[256], *q = cleanURL, *p = URL + 1, *e = p + MIN(sizeof(cleanURL) - 1, strlen(p)); while(p < e) { const u8 c = *p++; switch(c) // "A-Za-z0-9-._~:/?#[]@!$&'()*+,;=%" { #ifdef _WIN32 case '\\': // filter what breaks Windows fs case ':': case '*': case '?': case '\"': case '<': case '>': case '|': *q++ = '_'; break; #else case '/': *q++ = '!'; break; // filter what breaks Unix fs #endif default : *q++ = c; break; } } *q = 0; char filename[256]; // keep filenames readable... snprintf(filename, sizeof(filename) -1, "%s_%s%s.txt", SERVER_NAME ? SERVER_NAME : "x", cleanURL, RANGE); fo = fopen(filename, "w+b"); } if(!fo) { perror("can't open output file"); // "Permission denied" return 1; } { const char fmt[] = "==============================================================" "=================\n" "G-WAN ApacheBench / Weighttp / HTTPerf wrapper " "http://gwan.ch/source/ab.c\n" "--------------------------------------------------------------" "-----------------\nNow: %s"; time_t tm; time(&tm); struct tm *t = localtime(&tm); fprintf(fo, fmt, asctime(t)); fprintf(stdout, fmt, asctime(t)); } // ------------------------------------------------------------------------- // find CPU topology, RAM, OS release, etc. // ------------------------------------------------------------------------- #ifndef _WIN32 int nbr_cpu = cpu_type(fo), nbr_cores = nbr_cpu & 0x0000ffff; nbr_cpu >>= 16; { s64 sys_free_ram = 0, sys_total_ram = 0, sys_buff_ram = 0, sys_cach_ram = 0; sys_ram(&sys_free_ram, &sys_total_ram, &sys_buff_ram, &sys_cach_ram); if(sys_free_ram && sys_total_ram) { // keep it in Bytes here sys_used_ram = sys_total_ram + sys_buff_ram + sys_cach_ram - sys_free_ram; sprintf(buf, "RAM: %.02f/%.02f (Free/Total, in GB)\n", sys_free_ram / (1024 * 1024.), sys_total_ram / (1024 * 1024.)); fputs(buf, fo); fprintf(stdout, "%s", buf); } } { char name[256] = {0}; f = fopen("/etc/issue", "r"); if(f) { int len = fread(name, 1, sizeof(name) - 1, f); if(len > 0) { name[len] = 0; // just in case char *p = name; while(*p && !iscntrl(*p)) p++; *p = 0; } fclose(f); } struct utsname u; uname(&u); sprintf(buf, "OS : %s %s v%s %s\n %s\n", u.sysname, u.machine, u.version, u.release, name); fprintf(fo, "%s", buf); fprintf(stdout, "%s", buf); } { const char fmt[] = "abc: max open sockets: %ld\n\n"; const long fds = sysconf(_SC_OPEN_MAX); fprintf(fo, fmt, fds); fprintf(stdout, fmt, fds); } // ------------------------------------------------------------------------- // servers like Nginx implement workers with processes (others, like G-WAN // use threads), so we have to find all the possible process/thread mixes // ------------------------------------------------------------------------- icpu_t *old_cpu = 0, *beg_cpu = 0; int nbr_pids = 0, nbr_srv_launches = 1; int *pids = 0, process_pid = 0; if(SERVER_NAME) // any server process name provided on command line? { nbr_pids = pidsof(SERVER_NAME, &pids); if(!nbr_pids) { fprintf(stderr, "\nCan't find any process containing '%s'\n" "(Make sure the '%s' server is started)\n\n", SERVER_NAME, SERVER_NAME); exit(1); } old_cpu = (icpu_t*)calloc(nbr_pids, sizeof(icpu_t)), beg_cpu = (icpu_t*)calloc(nbr_pids, sizeof(icpu_t)); dump_srv_topology(buf, fo, SERVER_NAME, &process_pid, nbr_pids, pids); // try to get the server version ('root' privileges are required for // web servers run under the 'root' account) char srv_path[512]; pid_path(process_pid, srv_path, sizeof(srv_path)); if(srv_path && *srv_path) { fprintf(stdout, "%s\n", srv_path); fprintf(fo, "%s\n", srv_path); char version[1024]; char *v = srv_ver(srv_path, version, sizeof(version)); if(v && *v) { fprintf(stdout, "%s\n", v); fprintf(fo, "%s\n", v); } } // get the start count of CPU jiffies for this server res_args_t res_args = {cpu_ram_buf, nbr_pids, pids, beg_cpu}; th_resources(&res_args); } // ------------------------------------------------------------------------- // log the test configuration // ------------------------------------------------------------------------- { char str[4096]; snprintf(str, sizeof(str), "\n" CLI_NAME " -n %u -c [%u-%u step:%d rounds:%u] " #ifdef IBM_APACHEBENCH "-S -d " #endif #ifdef LIGHTY_WEIGHTTP "-t %u " #endif "%s " "\"http://%s%s%s\"\n\n", NBR_REQUESTS, FROM, TO, STEP, ITER, #ifdef LIGHTY_WEIGHTTP nbr_cores, #endif KEEP_ALIVES_STR, IP, PORT, URL); #endif fputs(str, fo); fputs(str, stdout); } // ------------------------------------------------------------------------- // check that a server is listening on the provided IP:PORT // ------------------------------------------------------------------------- int ret = http_req(URL, fo); if(ret < 0) { fprintf(stderr, "\n * Can't find a server listening on '%s%s'\n" " (Make sure a server is listening there)\n\n", IP, PORT); exit(2); } else // some servers are really slow for 404, so it may make sense to if(ret == 404) // benchmark this - just make sure that's your intent... { fprintf(stderr, "\n * Warning: the resource '%s' is not found (404)\n" " (Make sure you want to test 404 replies)\n\n", URL); sleep(3); // let users read the warning before text is scrolling... } fprintf(stdout, "\n"); fprintf(fo, "\n"); // ------------------------------------------------------------------------- // test header // ------------------------------------------------------------------------- { const char head1[] = " Number Requests per second CPU RAM\n" " of ---------------------------- ---------------- --------------\n" "Clients min ave max user kernel SRV MB SYS MB Time\n" "-------- -------- -------- -------- ------- ------- ------ ------ --------\n"; const char head2[] = " Number Requests per second\n" " of ----------------------------\n" "Clients min ave max Time\n" "-------- -------- -------- -------- --------------\n"; const char *head = SERVER_NAME && nbr_pids ? head1 : head2; printf("%s", head); // avoid (a) CRLF, (b) GCC warning fputs(head, fo); fflush(stdout); } // ------------------------------------------------------------------------- // prepare the client command line // ------------------------------------------------------------------------- for(i = FROM; i <= TO; i += STEP) { #ifdef IBM_APACHEBENCH // ApacheBench makes it straight for you since you can directly tell // the 'concurrency' and 'duration' you wish: sprintf(str, "ab -n %u -c %d -S -d -t 1 %s " "-H \"Accept-Encoding: gzip\" " // HTTP compression "\"http://%s%s%s\"" #ifdef _WIN32 " > ab.txt" #endif , NBR_REQUESTS, i ? i : 1, KEEP_ALIVES_STR, IP, PORT, URL); #elif defined HP_HTTPERF // HTTPerf does not let you specify the 'concurrency'rate: // // rate : number of TCP connections per second // num-con : number of TCP connections // num-call: number of HTTP requests // // If we want 100,000 HTTP requests, we have to calculate how many // '--num-conn' and '--num-call' to specify for a given '--rate': // // nbr_req = rate * num-call // // 'num-conn' makes it last longer, but to get any given 'rate' // 'num-conn' must always be >= to 'rate' // // HTTPerf creates new connections grogressively and only collects // statistics after 5 seconds (to let servers 'warm-up' before they // are tested). This is NOT reflecting real-life situations where // clients send requests on short but intense bursts. // // Also, HTTPerf's looooong shots make the TIME_WAIT state become a // problem if you do any serious concurrency test. // // Finally, HTTPerf is unable to test client concurrency: if 'rate' // is 1 but num-conn is 2 and num-call is 100,000 then you are more // than likely to end with concurrent connections because not all // requests are processed when the second connection is launched. // // If you use a smaller num-call value then you are testing the TCP // /IP stack rather than the user-mode code of the server. // // As a result, HTTPerf can only be reliably used without Keep-Alives // (with num-call=1) // sprintf(str, "httperf --server=%s --port=%s " "--rate=%d " #ifdef KEEP_ALIVES "--num-conns=%u --num-calls 100000 " // KEEP-ALIVES #else "--num-conns=%u --num-calls 1 " // NO Keep_Alives #endif "--timeout 5 --hog --uri=\"%s\"" #ifdef _WIN32 " > ab.txt" #endif , IP, PORT, i ? i : 1, i ? i : 1, URL); #elif defined LIGHTY_WEIGHTTP sprintf(str, "weighttp -n %u -c %d -t %u %s " "-H \"Accept-Encoding: gzip\" " // HTTP compression "\"http://%s%s%s\"" // Weighttp rejects concurrency inferior to thread count: , NBR_REQUESTS, i > nbr_cores ? i : nbr_cores, nbr_cores, KEEP_ALIVES_STR, IP, PORT, URL); #endif // ---------------------------------------------------------------------- // test loop, running the client tool 'n' times // ---------------------------------------------------------------------- for(max_rps = 0, ave_rps = 0, min_rps = 0xffff0, j = 0; j < ITER; j++) { #ifdef _WIN32 // Windows needs to take its breath after system() calls (this is not // giving any advantage to Windows as all the tests have shown that // this OS platform is -by far- the slowest and less scalable of all) system(str); Sleep(4000); // get the information we need from res.txt if(!(f = fopen("ab.txt", "rb"))) { fprintf(stdout, "Can't open ab.txt output\n"); return 1; } //memset(buf, 0, sizeof(buf) - 1); *buf = 0; nbr = fread(buf, 1, sizeof(buf) - 1, f); if(nbr <= 0) { fprintf(stdout, "Can't read ab.txt output\n"); return 1; } fclose(f); #else // some server workers crash (and are restarted using a different pid) // or some servers start thread/process workers 'on-demand' during the // test so we have to check if the list of pids we established first // is still relevant if(SERVER_NAME) // if we were instructed to collect RAM/CPU usage { int *_pids = 0, process_pid = 0; int _nbr_pids = pidsof(SERVER_NAME, &_pids); if(!_nbr_pids) { if(_pids) free(_pids); if(pids) free(pids); pids = 0; nbr_pids = 0; if(old_cpu) free(old_cpu); old_cpu = 0; if(beg_cpu) free(beg_cpu); beg_cpu = 0; nbr_srv_launches++; } else if(nbr_pids != _nbr_pids || memcmp(_pids, pids, _nbr_pids * sizeof(int))) { // replace the old list by the new list // (here we could do better and compare the 2 lists pid by pid // to inherit from previously relevant statistics for a given // pid - that's left as an exercise for the reader...) if(pids) free(pids); pids = _pids; nbr_pids = _nbr_pids; if(old_cpu) free(old_cpu); if(beg_cpu) free(beg_cpu); old_cpu = (icpu_t*)calloc(nbr_pids, sizeof(icpu_t)), beg_cpu = (icpu_t*)calloc(nbr_pids, sizeof(icpu_t)); nbr_srv_launches++; } } // MUST be done in parallel to 'ab' because otherwise we check the // resources consumed by the server AFTER the 'ab' test is done if(nbr_pids) { ab_done = 0; run_cmd_t cmd_args = {.cmd = str, .buf = buf, .buflen = sizeof(buf)}; pthread_t th_ab; pthread_create(&th_ab, NULL, th_run_cmd, (void*)&cmd_args); res_args_t res_args = {cpu_ram_buf, nbr_pids, pids, old_cpu}; pthread_t th_res; pthread_create(&th_res, NULL, th_resources, (void*)&res_args); void *ret_code; pthread_join(th_ab, (void**)&ret_code); nbr = (long)ret_code; ab_done = 1; // one writer, several readers pthread_join(th_res, NULL); } else nbr = run_cmd(str, buf, sizeof(buf)); #endif if(nbr > 0 && nbr < sizeof(buf)) *(buf + nbr) = 0; nbr = 0; if(*buf) { // IIS 7.0 quickly stops serving loans and sends error 503 (Service // unavailable) at a relatively high rate. If we did not detect it // this would be interpreted as a 'boost' in performance while, in // fact, IIS is dying. Soon, IIS would really die and we would have // to reboot the host: a complete IIS stop/restart has no effect). // Other issues to catch here are error 30x (redirects) or 404 // (not found) on badly configured servers that make users report // that their application server is fast when this is not the case. #ifdef IBM_APACHEBENCH char *p = strstr(buf, "Non-2xx responses:"); if(p) // "Non-2xx responses: 50130" { char *n; p += sizeof("Non-2xx responses:"); while(*p == ' ' || *p == '\t') p++; n = p; while(*p >= '0' && *p <= '9') p++; *p = 0; nbr = atoi(n); if(nbr) { fprintf(stdout, "* Non-2xx responses:%d\n", nbr); fprintf(fo, "* Non-2xx responses:%d\n", nbr); // dump the server reply on stderr for examination http_req(URL, fo); goto end; } } p = strstr(buf, "Requests per second:"); if(p) // "Requests per second: 16270.00 [#/sec] (mean)" { char *n; p += sizeof("Requests per second:"); while(*p == ' ' || *p == '\t') p++; n = p; while(*p >= '0' && *p <= '9') p++; *p = 0; nbr = atoi(n); } else puts("* 'Requests per second' not found!"); #elif defined HP_HTTPERF char *p = strstr(buf, "Reply status:"); if(p) // "Reply status: 1xx=0 2xx=1000000 3xx=0 4xx=0 5xx=0" { char *n; p += sizeof("Reply status: 1xx=") - 1; // we are not interested in "1xx" errors if(*p == '0') // pass "2xx=" if no errors p = strstr(p, "3xx="); if(p && p[4] == '0') // pass "3xx=" if no errors p = strstr(p, "4xx="); if(p && p[4] == '0') // pass "4xx=" if no errors p = strstr(p, "5xx="); if(p && p[4] == '0') // pass "5xx=" if no errors goto no_errors; p+=sizeof("5xx="); while(*p == ' ' || *p == '\t') p++; n = p; while(*p >= '0' && *p <= '9') p++; *p = 0; nbr = atoi(n); if(nbr) { fprintf(stdout, "* Non-2xx responses:%d\n", nbr); fprintf(fo, "* Non-2xx responses:%d\n", nbr); // dump the server reply on stderr for examination http_req(URL, fo); goto end; } } no_errors: // Reply rate [replies/s]: min 163943.9 avg 166237.2 max 167482.3 // stddev 1060.4 (12 samples) p = strstr(buf, "Reply rate"); if(p) { char *n; p += sizeof("Reply rate [replies/s]: min"); while(*p == ' ' || *p == '\t') p++; n = p; while(*p >= '0' && *p <= '9') p++; *p++ = 0; p++; min_rps=atoi(n); while(*p<'0' || *p>'9') p++; // avg n=p; while(*p >= '0' && *p <= '9') p++; *p++ = 0; p++; ave_rps = atoi(n); while(*p < '0' || *p > '9') p++; // max n=p; while(*p >= '0' && *p <= '9') p++; *p++ = 0; p++; max_rps = atoi(n); } else puts("* 'Reply rate' not found!"); // HTTPerf needs so many more requests than AB that it quickly // exhausts the [1 - 65,535] port space. There is no obvious // solution other than using several HTTPerf workers OR waiting /* a bit between each shot to let the system evacuate the bloat: if(!strcmp(IP, "127.0.0.1")) { int nop = 60; fprintf(stdout, "waiting:"); fflush(stdout); while(nop--) { fprintf(stdout, "."); fflush(stdout); sleep(1); } fprintf(stdout, "\n"); fflush(stdout); }*/ goto round_done; #elif defined LIGHTY_WEIGHTTP char *p = strstr(buf, "microsec,"); // "microsec, 12345 req/s" if(p) { p += sizeof("microsec,"); nbr = atoi(p); #ifdef TRACK_ERRORS p = strstr(p, "succeeded,"); // "succeeded, 0 failed, 0 errored" u32 nbr_errors = 0; if(p) { p += sizeof("succeeded,"); nbr_errors = atoi(p); } if(nbr_errors) { fprintf(stdout, "* failed responses:%d\n", nbr); fprintf(fo, "* failed responses:%d\n", nbr); // dump the server reply on stderr for examination // (might not help: HTTP headers are fine, most of // the time the body is corrupted/incomplete/etc.) // //http_req(URL, fo); goto end; } #endif } //goto round_done; #endif } // if(nbr_pids) if(max_rps < nbr) max_rps = nbr; if(min_rps > nbr) min_rps = nbr; ave_rps += nbr; } //for(max_rps = 0, ave_rps = 0, min_rps = 0xffff0, j = 0; j < ITER; j++) ave_rps /= ITER; #ifdef HP_HTTPERF round_done: #endif tmin_rps += min_rps; tmax_rps += max_rps; tave_rps += ave_rps; // ---------------------------------------------------------------------- // display concurrency step data for convenience and save it on disk // ---------------------------------------------------------------------- nbr = sprintf(buf, "%7d, %8d, %8d, %8d, %s %s\n", i ? i : 1, min_rps, ave_rps, max_rps, cpu_ram_buf, tm_now()); fwrite(buf, 1, nbr, stdout); if(fwrite(buf, 1, nbr, fo) != nbr) { fprintf(stdout, "fwrite(fo) failed"); return 1; } fflush(fo); // in case we interrupt the test with Ctrl-C } // for(i = FROM; i <= TO; i += STEP) end: st = time(NULL) - st; strcpy(buf, "---------------------------------------------------------" "----------------------"); puts(buf); fputs(buf, fo); fputs("\n", fo); strftime(str, sizeof(str) - 1, "%X", gmtime(&st)); sprintf(buf, "min:%"FMTU64" avg:%"FMTU64" max:%"FMTU64 " Time:%ld second(s) [%s]", tmin_rps, tave_rps, tmax_rps, st, str); puts(buf); fputs(buf, fo); fputs("\n", fo); strcpy(buf, "---------------------------------------------------------" "----------------------\n"); puts(buf); fputs(buf, fo); if(SERVER_NAME) // any server process name provided on command line? { // print the total count of CPU jiffies for this server u64 user = 0, kernel = 0; int i = nbr_pids; while(i--) user += (old_cpu[i].user - beg_cpu[i].user), kernel += (old_cpu[i].kernel - beg_cpu[i].kernel); sprintf(buf, "CPU jiffies: user:%"FMTU64" kernel:%"FMTU64 " total:%"FMTU64, user, kernel, user + kernel); puts(buf); fputs(buf, fo); if(nbr_srv_launches > 1) { // print the number of server process/thread launches sprintf(buf, "\n\n'%s' relaunches (threads/processes): %d\n\n" "WARNING: partially reported CPU/RAM statistics!\n", SERVER_NAME, nbr_srv_launches); puts(buf); fputs(buf, fo); // show breakdown again for comparison with first detection //dump_srv_topology(buf, fo, SERVER_NAME, &process_pid, nbr_pids, pids); } } fputs(" ", fo); puts(" "); fclose(fo); return 0; } // ============================================================================ // A 'quick and (really) dirty' wget (don't use this code in production!) // ---------------------------------------------------------------------------- // read a CRLF-terminated line of text from the socket // return the number of bytes read, -1 if error // ---------------------------------------------------------------------------- static int read_line(int fd, char *buffer, int max) { char *p = buffer; while(max--) { if(read(fd, p, 1) <= 0) break; if(*p == '\r') continue; if(*p == '\n') break; p++; } *p = 0; return p - buffer; } // ---------------------------------------------------------------------------- // read 'len' bytes from the socket // return the number of bytes read, -1 if error // ---------------------------------------------------------------------------- static int read_len(int fd, char *buffer, int len) { int ret; char *p = buffer; while(len > 0) { ret = read(fd, p, len); if(ret <= 0) return -1; p += ret; len -= ret; } return p - buffer; } // ---------------------------------------------------------------------------- static void so_timeout(int fd, u32 milisecs) { int timeout = milisecs; setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char*)&timeout, sizeof(timeout)); setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, (char*)&timeout, sizeof(timeout)); } // ---------------------------------------------------------------------------- // connect to the server, send the HTTP request and dump the server reply // return the HTTP status sent by the server, -1 if error // ---------------------------------------------------------------------------- static int http_req(char *request, FILE *f) // fd: log file { char buf[4096]; int s, ret = -1; #ifdef _WIN32 WSADATA sa; WORD ver = MAKEWORD(2, 2); WSAStartup(ver, &sa); #endif int port = atoi(PORT + 1); // don't forget it's ":8080" int addr = inet_addr(IP); // convert string into integer address do { if(addr == INADDR_NONE) // is it a valid HOST address? { // get the HOST address of the specified host name struct hostent phost, *hp; char tmp[1024] = {0}; int err = 0; if(gethostbyname_r(IP, &phost, tmp, sizeof(tmp), &hp, &err) == 0 && hp) addr = *((u32*)phost.h_addr_list[0]); else addr = INADDR_NONE; // failed to resolve hostname } if(addr == INADDR_NONE) { // errno's message not very useful: "Resource temporarily unavailable" //perror("can't resolve host "); break; } s = socket(AF_INET, SOCK_STREAM, 0); if(s < 0) break; // most unlikely //printf("connecting to %s%s...\n", IP, PORT); struct sockaddr_in host; bzero(&host, sizeof(host)); host.sin_family = AF_INET; host.sin_addr.s_addr = addr; host.sin_port = htons(port); so_timeout(s, 500); // Linux kernel 2.3.41+ required for connect() ret = connect(s, (struct sockaddr*)&host, sizeof(host)); if(ret) // 0:OK { //perror("can't connect() "); // redundant with our own error break; } int len = sprintf(buf, "GET %s HTTP/1.1\r\n" "Host: %s%s\r\n" "User-Agent: a.c\r\n" "Accept-Encoding: gzip\r\n" "Connection: close\r\n\r\n", request, IP, PORT); ret = write(s, buf, len); if(ret != len) break; // most unlikely here len = read_line(s, buf, sizeof(buf) - 1); if(len <= 0) break; printf("=> %s\n", buf); if(f) fprintf(f, "=> %s\n", buf); // don't do that with production code: if(sscanf(buf, "HTTP/1.%*d %3d", (int*)&ret) != 1) break; if(ret > 0) // ret is the HTTP status, parse the server reply { for(*buf = 0;;) { int n = read_line(s, buf, sizeof(buf) - 1); if(n <= 0) break; buf[n] = 0; printf(" %s\n", buf); if(f) fprintf(f, " %s\n", buf); char *p = buf; for(; *p && *p != ':'; p++) *p = tolower(*p); sscanf(buf, "content-length: %d", &len); } // print beginning of fetched resource (if printable) len = (len > (sizeof(buf) - 1)) ? (sizeof(buf) - 1) : len; len = read_len(s, buf, len); if(len > 0 && isalpha(buf[0]) && isalpha(buf[1])) { buf[len] = 0; printf("=> %.40s...\n", buf); if(f) fprintf(f, "=> %.40s...\n", buf); } } break; } while(0); close(s); return ret; } // ============================================================================ // End of Source Code // ============================================================================