mirror of
https://github.com/pocoproject/poco.git
synced 2025-10-24 09:12:28 +02:00
fix(SQLParser): move to Data dir; add extradirs, remove vs 140,150 build scripts generation
This commit is contained in:
14
Data/SQLParser/benchmark/README.md
Normal file
14
Data/SQLParser/benchmark/README.md
Normal file
@@ -0,0 +1,14 @@
|
||||
# Benchmark
|
||||
|
||||
This directory contains the scripts to execute benchmarks of the parser. We use [Google Benchmark](https://github.com/google/benchmark) to define and run benchmarks.
|
||||
|
||||
## Install Google Benchmark
|
||||
|
||||
```bash
|
||||
cmake -DCMAKE_BUILD_TYPE=Release
|
||||
|
||||
make
|
||||
|
||||
make install
|
||||
```
|
||||
|
||||
28
Data/SQLParser/benchmark/benchmark.cpp
Normal file
28
Data/SQLParser/benchmark/benchmark.cpp
Normal file
@@ -0,0 +1,28 @@
|
||||
#include "benchmark/benchmark.h"
|
||||
|
||||
#include "benchmark_utils.h"
|
||||
#include "queries.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
// Create parse and tokenize benchmarks for TPC-H queries.
|
||||
const auto tpch_queries = getTPCHQueries();
|
||||
for (const auto& query : tpch_queries) {
|
||||
std::string p_name = query.first + "-parse";
|
||||
benchmark::RegisterBenchmark(p_name.c_str(), &BM_ParseBenchmark, query.second);
|
||||
std::string t_name = query.first + "-tokenize";
|
||||
benchmark::RegisterBenchmark(t_name.c_str(), &BM_TokenizeBenchmark, query.second);
|
||||
}
|
||||
|
||||
// Create parse and tokenize benchmarks for all queries in sql_queries array.
|
||||
for (unsigned i = 0; i < sql_queries.size(); ++i) {
|
||||
const auto& query = sql_queries[i];
|
||||
std::string p_name = getQueryName(i) + "-parse";
|
||||
benchmark::RegisterBenchmark(p_name.c_str(), &BM_ParseBenchmark, query.second);
|
||||
|
||||
std::string t_name = getQueryName(i) + "-tokenize";
|
||||
benchmark::RegisterBenchmark(t_name.c_str(), &BM_TokenizeBenchmark, query.second);
|
||||
}
|
||||
|
||||
benchmark::Initialize(&argc, argv);
|
||||
benchmark::RunSpecifiedBenchmarks();
|
||||
}
|
||||
44
Data/SQLParser/benchmark/benchmark_utils.cpp
Normal file
44
Data/SQLParser/benchmark/benchmark_utils.cpp
Normal file
@@ -0,0 +1,44 @@
|
||||
#include "benchmark_utils.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
#include "SQLParser.h"
|
||||
|
||||
size_t getNumTokens(const std::string& query) {
|
||||
std::vector<int16_t> tokens;
|
||||
hsql::SQLParser::tokenize(query, &tokens);
|
||||
return tokens.size();
|
||||
}
|
||||
|
||||
void BM_TokenizeBenchmark(benchmark::State& st, const std::string& query) {
|
||||
st.counters["num_tokens"] = getNumTokens(query);
|
||||
st.counters["num_chars"] = query.size();
|
||||
|
||||
while (st.KeepRunning()) {
|
||||
std::vector<int16_t> tokens(512);
|
||||
hsql::SQLParser::tokenize(query, &tokens);
|
||||
}
|
||||
}
|
||||
|
||||
void BM_ParseBenchmark(benchmark::State& st, const std::string& query) {
|
||||
st.counters["num_tokens"] = getNumTokens(query);
|
||||
st.counters["num_chars"] = query.size();
|
||||
|
||||
while (st.KeepRunning()) {
|
||||
hsql::SQLParserResult result;
|
||||
hsql::SQLParser::parse(query, &result);
|
||||
if (!result.isValid()) {
|
||||
std::cout << query << std::endl;
|
||||
std::cout << result.errorMsg() << std::endl;
|
||||
st.SkipWithError("Parsing failed!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string readFileContents(const std::string& file_path) {
|
||||
std::ifstream t(file_path.c_str());
|
||||
std::string text((std::istreambuf_iterator<char>(t)),
|
||||
std::istreambuf_iterator<char>());
|
||||
return text;
|
||||
}
|
||||
41
Data/SQLParser/benchmark/benchmark_utils.h
Normal file
41
Data/SQLParser/benchmark/benchmark_utils.h
Normal file
@@ -0,0 +1,41 @@
|
||||
#ifndef __BENCHMARK_UTILS_H__
|
||||
#define __BENCHMARK_UTILS_H__
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
|
||||
size_t getNumTokens(const std::string& query);
|
||||
|
||||
void BM_TokenizeBenchmark(benchmark::State& st, const std::string& query);
|
||||
|
||||
void BM_ParseBenchmark(benchmark::State& st, const std::string& query);
|
||||
|
||||
std::string readFileContents(const std::string& file_path);
|
||||
|
||||
|
||||
|
||||
|
||||
#define TIME_DIFF(end, start)\
|
||||
std::chrono::duration_cast<std::chrono::duration<double>>(end - start);
|
||||
|
||||
#define NOW()\
|
||||
std::chrono::high_resolution_clock::now();
|
||||
|
||||
#define PARSE_QUERY_BENCHMARK(name, query)\
|
||||
static void name(benchmark::State& st) {\
|
||||
BM_ParseBenchmark(st, query);\
|
||||
}\
|
||||
BENCHMARK(name);
|
||||
|
||||
#define TOKENIZE_QUERY_BENCHMARK(name, query)\
|
||||
static void name(benchmark::State& st) {\
|
||||
BM_TokenizeBenchmark(st, query);\
|
||||
}\
|
||||
BENCHMARK(name);
|
||||
|
||||
|
||||
#define BENCHMARK_QUERY(test_name, query)\
|
||||
TOKENIZE_QUERY_BENCHMARK(test_name##Tokenize, query)\
|
||||
PARSE_QUERY_BENCHMARK(test_name##Parse, query)
|
||||
|
||||
|
||||
#endif
|
||||
87
Data/SQLParser/benchmark/parser_benchmark.cpp
Normal file
87
Data/SQLParser/benchmark/parser_benchmark.cpp
Normal file
@@ -0,0 +1,87 @@
|
||||
|
||||
#include <chrono>
|
||||
#include <sstream>
|
||||
#include "benchmark/benchmark.h"
|
||||
|
||||
#include "SQLParser.h"
|
||||
#include "parser/bison_parser.h"
|
||||
#include "parser/flex_lexer.h"
|
||||
|
||||
#include "benchmark_utils.h"
|
||||
|
||||
// Benchmark the influence of increasing size of the query, while
|
||||
// the number of tokens remains unchanged.
|
||||
static void BM_CharacterCount(benchmark::State& st) {
|
||||
const size_t querySize = st.range(0);
|
||||
|
||||
// Base query has size of 18 characters.
|
||||
std::string query = "SELECT %name% FROM test;";
|
||||
|
||||
const uint pad = querySize - 18;
|
||||
const std::string filler = std::string(pad, 'a');
|
||||
query.replace(7, 6, filler);
|
||||
|
||||
st.counters["num_tokens"] = getNumTokens(query);
|
||||
st.counters["num_chars"] = query.size();
|
||||
while (st.KeepRunning()) {
|
||||
hsql::SQLParserResult result;
|
||||
hsql::SQLParser::parse(query, &result);
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_CharacterCount)
|
||||
->RangeMultiplier(1 << 2)
|
||||
->Ranges({{1 << 5, 1 << 15},
|
||||
{5, 5}});
|
||||
|
||||
// Benchmark the influence of increasing number of tokens, while
|
||||
// the number of characters remains unchanged.
|
||||
static void BM_ConditionalTokens(benchmark::State& st) {
|
||||
const size_t targetSize = st.range(0);
|
||||
const size_t numTokens = st.range(1);
|
||||
|
||||
// Base query contains 6 tokens.
|
||||
std::string query = "SELECT * FROM test";
|
||||
|
||||
// Create conditional.
|
||||
std::stringstream condStream;
|
||||
size_t missingTokens = numTokens - 4;
|
||||
if (missingTokens > 0) {
|
||||
condStream << " WHERE a";
|
||||
missingTokens -= 2;
|
||||
|
||||
while (missingTokens > 0) {
|
||||
condStream << " AND a";
|
||||
missingTokens -= 2;
|
||||
}
|
||||
}
|
||||
|
||||
query += condStream.str();
|
||||
|
||||
if (targetSize >= query.size()) {
|
||||
const size_t pad = targetSize - query.size();
|
||||
const std::string filler = std::string(pad, 'a');
|
||||
query.replace(7, 1, filler);
|
||||
|
||||
} else {
|
||||
// Query can't be the same length as in the other benchmarks.
|
||||
// Running this will result in unusable data.
|
||||
fprintf(stderr, "Too many tokens. Query too long for benchmark char limit (%lu > %lu).\n",
|
||||
query.size(), targetSize);
|
||||
return;
|
||||
}
|
||||
|
||||
st.counters["num_tokens"] = getNumTokens(query);
|
||||
st.counters["num_chars"] = query.size();
|
||||
while (st.KeepRunning()) {
|
||||
hsql::SQLParserResult result;
|
||||
hsql::SQLParser::parse(query, &result);
|
||||
if (!result.isValid()) st.SkipWithError("Parsing failed!");
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_ConditionalTokens)
|
||||
->RangeMultiplier(1 << 2)
|
||||
->Ranges({{1 << 14, 1 << 14},
|
||||
{1 << 2, 1 << 11}});
|
||||
|
||||
|
||||
|
||||
47
Data/SQLParser/benchmark/queries.cpp
Normal file
47
Data/SQLParser/benchmark/queries.cpp
Normal file
@@ -0,0 +1,47 @@
|
||||
#include "queries.h"
|
||||
|
||||
#include <filesystem>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <regex>
|
||||
|
||||
#include "benchmark_utils.h"
|
||||
|
||||
namespace filesystem = std::filesystem;
|
||||
|
||||
std::string getQueryName(unsigned i) {
|
||||
if (sql_queries[i].first.empty()) {
|
||||
std::string name = "#" + std::to_string(i + 1);
|
||||
return name;
|
||||
}
|
||||
return std::string("") + sql_queries[i].first;
|
||||
}
|
||||
|
||||
std::vector<SQLQuery> getQueriesFromDirectory(const std::string& dir_path) {
|
||||
std::regex query_file_regex("\\.sql$");
|
||||
std::vector<std::string> files;
|
||||
|
||||
for (auto& entry : filesystem::directory_iterator(dir_path)) {
|
||||
if (filesystem::is_regular_file(entry)) {
|
||||
std::string path_str = filesystem::path(entry);
|
||||
|
||||
if (std::regex_search(path_str, query_file_regex)) {
|
||||
files.push_back(path_str);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(files.begin(), files.end());
|
||||
|
||||
std::vector<SQLQuery> queries;
|
||||
for (const std::string& file_path : files) {
|
||||
const filesystem::path p(file_path);
|
||||
const std::string query = readFileContents(file_path);
|
||||
queries.emplace_back(p.filename(), query);
|
||||
}
|
||||
return queries;
|
||||
}
|
||||
|
||||
std::vector<SQLQuery> getTPCHQueries() {
|
||||
return getQueriesFromDirectory("test/queries/");
|
||||
}
|
||||
56
Data/SQLParser/benchmark/queries.h
Normal file
56
Data/SQLParser/benchmark/queries.h
Normal file
@@ -0,0 +1,56 @@
|
||||
#ifndef __QUERIES_H__
|
||||
#define __QUERIES_H__
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
typedef std::pair<std::string, std::string> SQLQuery;
|
||||
|
||||
// name, query
|
||||
static std::vector<SQLQuery> sql_queries = {
|
||||
{"Q1", "SELECT * FROM test;"},
|
||||
{"Q2", "SELECT a, b AS address FROM (SELECT * FROM test WHERE c < 100 AND b > 3) t1 WHERE a < 10 AND b < 100;"},
|
||||
{"Q3", "SELECT \"left\".a, \"left\".b, \"right\".a, \"right\".b FROM table_a AS \"left\" JOIN table_b AS \"right\" ON \"left\".a = \"right\".a;"},
|
||||
{"Q4", ""
|
||||
"SELECT"
|
||||
" l_orderkey,"
|
||||
" SUM(l_extendedprice * (1 - l_discount)) AS revenue,"
|
||||
" o_orderdate,"
|
||||
" o_shippriority"
|
||||
" FROM"
|
||||
" customer,"
|
||||
" orders,"
|
||||
" lineitem"
|
||||
" WHERE"
|
||||
" c_mktsegment = '%s'"
|
||||
" and c_custkey = o_custkey"
|
||||
" and l_orderkey = o_orderkey"
|
||||
" and o_orderdate < '%s'"
|
||||
" and l_shipdate > '%s'"
|
||||
" GROUP BY"
|
||||
" l_orderkey,"
|
||||
" o_orderdate,"
|
||||
" o_shippriority"
|
||||
" ORDER BY"
|
||||
" revenue DESC,"
|
||||
" o_orderdate;"
|
||||
},
|
||||
|
||||
{"LongSelectList26", "SELECT a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z FROM test;"},
|
||||
{"LongSelectElement26", "SELECT abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxy FROM test;"},
|
||||
{"LongSelectList52", "SELECT a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z FROM test;"},
|
||||
{"LongSelectElement52", "SELECT abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxy FROM test;"},
|
||||
{"TwoSelects", "SELECT * FROM test; SELECT age, street AS address FROM data;"},
|
||||
{"ExecuteNoParams", "EXECUTE procedure;"},
|
||||
{"Execute2Params", "EXECUTE procedure(11, 'test');"},
|
||||
{"Execute10Params", "EXECUTE procedure(11, 'test', 5.6, 4.2, 'abc', 6, 7, 8, 9, 10000);"},
|
||||
// {"name", "query"},
|
||||
};
|
||||
|
||||
std::string getQueryName(unsigned i);
|
||||
|
||||
std::vector<SQLQuery> getQueriesFromDirectory(const std::string& dir_path);
|
||||
|
||||
std::vector<SQLQuery> getTPCHQueries();
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user