// Author: Pete Kirkham // string match benchmark - mmap io, KMP algorithm. #include #include #include #include #include #include void kmp_table (const char*const string, size_t length, int* table) { size_t i(2); int j(0); table[0] = -1; table[1] = 0; for (; i < length; ) { if (string[i - 1] == string[j]) { table[i] = j + 1; ++i; ++j; } else if (j > 0) { j = table[j]; } else { table[i] = 0; ++i; } } } size_t kmp_search (const char* pattern, const int* table, size_t pattern_length, const char* string, size_t string_length) { for (size_t m(0), i(0); (m+1) < string_length; ) { if (pattern[i] == string[m + i]) { i = i + 1; if (i == pattern_length) { return m; } } else { m = m + i - table[i]; if (i > 0) { i = table[i]; } } } return string_length; } int main (int narg, char** argvp) { if (narg < 2) { std::cout << "The name of the input file is required." << std::endl; return 1; } // open the file int file(open(argvp[1], O_RDONLY)); if (!file) { std::cout << "failed to open " << argvp[1] << std::endl; return 2; } // find length struct stat file_stat; if (fstat(file, &file_stat)) { std::cout << "failed to stat " << argvp[1] << std::endl; return 2; } // generate KMP table const char* pattern("GET /ongoing/When/"); size_t pattern_length(strlen(pattern)); int* table(new int[pattern_length]); kmp_table(pattern, pattern_length, table); // mmap file size_t file_length(file_stat.st_size); void* mfile(mmap(0, file_length, PROT_READ, MAP_PRIVATE, file, 0)); if (!mfile) { std::cout << "failed to mmap " << argvp[1] << std::endl; return 2; } const char* buf(reinterpret_cast(mfile)); size_t matches(0); // scan for matches directly for (size_t index(0); index < file_length; ++index) { index += kmp_search(pattern, table, pattern_length, buf + index, file_length - index); index += pattern_length; while (index < file_length) { char ch(buf[index]); if ((ch == '.') || (ch == '\n')) { break; } if (ch == ' ') { ++matches; break; } ++index; } } munmap(mfile, file_length); delete [] table; std::cout << "matches: " << matches << std::endl; return 0; }