typedef u_long iter_t
typedef (*benchmp_f)(iter_t iterations, void* cookie)
void benchmp(benchmp_f initialize, benchmp_f benchmark, benchmp_f cleanup, int enough, int parallel, int warmup, int repetitions, void* cookie)
uint64 get_n()
void milli(char *s, uint64 n)
void micro(char *s, uint64 n)
void nano(char *s, uint64 n) void mb(uint64 bytes)
void kb(uint64 bytes)
There are two attributes that are critical for performance, latency and bandwidth, and lmbench’s timing harness makes it easy to measure and report results for both. Latency is usually important for frequently executed operations, and bandwidth is usually important when moving large chunks of data.
There are a number of factors to consider when building benchmarks.
The timing harness requires that the benchmarked operation be idempotent so that it can be repeated indefinitely.
The timing subsystem, benchmp, is passed up to three function pointers. Some benchmarks may need as few as one function pointer (for benchmark).
void
benchmark_lrand48(iter_t iterations, void* cookie) {
while(iterations-- > 0)
lrand48();
}
int
main(int argc, char *argv[])
{
benchmp(NULL, benchmark_lrand48, NULL, 0, 1, 0, TRIES, NULL);
micro(lrand48()", get_n());"
exit(0);
}
Here is a simple benchmark that measures and reports the bandwidth of bcopy:
#define MB (1024 * 1024)
#define SIZE (8 * MB)
struct _state {
int size;
char* a;
char* b;
};
void
initialize_bcopy(iter_t iterations, void* cookie) {
struct _state* state = (struct _state*)cookie;
if (!iterations) return;
state->a = malloc(state->size);
state->b = malloc(state->size);
if (state->a == NULL || state->b == NULL)
exit(1)
;
}
void
benchmark_bcopy(iter_t iterations, void* cookie) {
struct _state* state = (struct _state*)cookie;
while(iterations-- > 0)
bcopy(state->a, state->b, state->size);
}
void
cleanup_bcopy(iter_t iterations, void* cookie) {
struct _state* state = (struct _state*)cookie;
if (!iterations) return;
free(state->a);
free(state->b);
}
int
main(int argc, char *argv[])
{
struct _state state;
state.size = SIZE;
benchmp(initialize_bcopy, benchmark_bcopy, cleanup_bcopy,
0, 1, 0, TRIES, &state);
mb(get_n() * state.size);
exit(0);
}
A slightly more complex version of the bcopy benchmark might measure bandwidth as a function of memory size and parallelism. The main procedure in this case might look something like this:
for (size = 64; size <= SIZE; size <<= 1) {
for (par = 1; par < 32; par <<= 1) {
state.size = size;
benchmp(initialize_bcopy, benchmark_bcopy,
cleanup_bcopy, 0, par, 0, TRIES, &state);
fprintf(stderr, d\t%d\t
mb(par * get_n() * state.size);
}
}
exit(0);
}
Comments, suggestions, and bug reports are always welcome.