我在单个表 (Engine=InnoDB) 上运行 select,不太复杂但有很多行。
第一次选择 id 比较慢,9M 行需要几秒钟,下一次选择要快得多,即使我更改查询也是如此。
我在 Windows 上尝试过 mysql,在 Linux 上尝试过 mariadb。
select `id`,count(*), sum(`counts`) from reference
where `id`=848
and `started`<= '2000-01-04 00:00:00'
and `ended` >= '2000-01-03 00:00:00';
select min(`counts`),max(`counts`) from reference where `id`=848 ;
当我在 linux 上的 mariadb 上运行时,结果 id 有时很快,但在 windows 上的 mysql 上它第一次也很慢。这让我觉得也许我错过了什么。
create database my_test_db default char set utf8 ;
use my_test_db;
create table items (
`id` int(11) not null auto_increment,
`name` varchar(50),
`description` varchar(250) default '',
primary key (`id`),
unique key item_name_unique(`name`)
create table reference (
`id` int(11) not null,
`started` datetime not null,
`ended` datetime not null,
`counts` int(11) not null,
key fk_item_id_idx (`id`),
key idx_started (`started`),
key idx_ended (`ended`),
constraint fk_item_id foreign key (`id`) references items(`id`)
on delete no action on update no action
MariaDB [my_test_db]> describe items;
| Field | Type | Null | Key | Default | Extra |
| id | int(11) | NO | PRI | NULL | auto_increment |
| name | varchar(50) | YES | UNI | NULL | |
| description | varchar(250) | YES | | | |
3 rows in set (0.00 sec)
MariaDB [my_test_db]> describe reference;
| Field | Type | Null | Key | Default | Extra |
| id | int(11) | NO | MUL | NULL | |
| started | datetime | NO | MUL | NULL | |
| ended | datetime | NO | MUL | NULL | |
| counts | int(11) | NO | | NULL | |
4 rows in set (0.00 sec)
我使用以下程序创建了 2 个 infile:
#include <iostream>
#include <fstream>
#include <string>
#include <ctime>
#include <sstream>
#include <cstring>
#include <random>
#define TM_BUF_SIZE 32
#ifndef WIN32
#define localtime_s(PTM,PTIME_T) localtime_r(PTIME_T,PTM)
int main(int argc, char** argv)
int id_max, count_iterations, time_frame;
std::cerr<<"Missing Arguments!!"<<std::endl;
std::cerr<<"Usage: DataGen item_count time_iteration time_frame"<<std::endl;
return -1;
id_max = (int)strtol(argv[1],nullptr,0);
count_iterations = (int)strtol(argv[2],nullptr,0);
time_frame = (int)strtol(argv[3],nullptr,0);
std::random_device r;
std::default_random_engine re(r());
std::uniform_int_distribution<int> uni_dist(0, 15);
std::tm temp, tmStart,tmEnd;
char bufStart[TM_BUF_SIZE], bufEnd[TM_BUF_SIZE];
std::memset(&temp, 0, sizeof(tm));
std::ofstream fitems("items.dat");
for (int id = 1; id <= id_max; id++)
fitems << id << "\tid-" << id << "\titem.number." << id << std::endl;
temp.tm_year = 100;
temp.tm_mday = 1;
time_t ts_start = mktime(&temp);
time_t ts_end;
int iteration_left = count_iterations;
std::ofstream frefs("references.dat");
ts_end = ts_start + time_frame;
localtime_s(&tmStart, &ts_start);
localtime_s(&tmEnd, &ts_end);
std::strftime(bufStart, TM_BUF_SIZE, "%Y-%m-%d %H:%M:%S.0", &tmStart);
std::strftime(bufEnd, TM_BUF_SIZE, "%Y-%m-%d %H:%M:%S.0", &tmEnd);
for (int id = 1; id <= id_max; id++)
int count = uni_dist(re);
frefs << id << "\t" << bufStart << "\t" << bufEnd << "\t"<<count<< std::endl;
ts_start = ts_end;
if(iteration_left && 0 == iteration_left % 100)
std::cout<<iteration_left<<" iterations left"<<std::endl;
return 0;
使用以下命令在 Linux 上编译它:
g++ -std=c++0x dataGen.cpp -o DataGen
像这样运行 DataGen 程序:
DataGen 3000 3000 60
该程序创建 2 个文件:“items.dat”和“references.dat”
use my_test_db;
load data infile '/root/items.dat' into table items;
load data infile '/root/references.dat' into table reference;
所以我用很多行填充了表格:项目有 3K 行,引用有 900 万行。
#first time for this id:
MariaDB [my_test_db]> select `id`,count(*), sum(`counts`) from reference where `id`=848 and `started`<= '2000-01-03 00:00:00' and `ended`>='2000-01-02 00:00:00';
| id | count(*) | sum(`counts`) |
| 848 | 1442 | 10640 |
1 row in set (3.31 sec)
#next query for same id change time filters:
MariaDB [my_test_db]> select `id`,count(*), sum(`counts`) from reference where `id`=848 and `started`<= '2000-01-04 00:00:00' and `ended`>='2000-01-03 00:00:00';
| id | count(*) | sum(`counts`) |
| 848 | 121 | 944 |
1 row in set (0.03 sec)
#next query for same id change time filters again:
MariaDB [my_test_db]> select `id`,count(*), sum(`counts`) from reference
where `id`=848
and `started`<= '2000-01-02 00:00:00'
and `ended` >= '2000-01-01 00:00:00';
| id | count(*) | sum(`counts`) |
| 848 | 1441 | 10848 |
1 row in set (0.06 sec)
-- 只改变 id:
MariaDB [my_test_db]> select `id`,count(*), sum(`counts`) from reference
where `id`=1848
and `started`<= '2000-01-02 00:00:00'
and `ended` >= '2000-01-01 00:00:00';
| id | count(*) | sum(`counts`) |
| 1848 | 1441 | 10576 |
1 row in set (2.63 sec)
#use same id change time filters:
MariaDB [my_test_db]> select `id`,count(*), sum(`counts`) from reference
where `id`=1848
and `started`<= '2000-01-02 12:00:00'
and `ended` >= '2000-01-01 12:00:00';
| id | count(*) | sum(`counts`) |
| 1848 | 1442 | 10780 |
1 row in set (0.03 sec)
#use consequent id is also fast:
MariaDB [my_test_db]> select `id`,count(*), sum(`counts`) from reference
where `id`=1849
and `started`<= '2000-01-02 12:00:00'
and `ended` >= '2000-01-01 12:00:00';
| id | count(*) | sum(`counts`) |
| 1849 | 1442 | 11001 |
1 row in set (0.11 sec)
-- 其他查询 - 相同的 id - 快速
MariaDB [my_test_db]> select min(counts),max(counts) from reference where `id`=1849 ;
| min(counts) | max(counts) |
| 0 | 15 |
1 row in set (0.03 sec)
#again it is slow for other id
MariaDB [my_test_db]> select min(counts),max(counts) from reference where `id`=1800 ;
| min(counts) | max(counts) |
| 0 | 15 |
1 row in set (2.36 sec)
-- 描述查询:
MariaDB [my_test_db]> describe select `id`,count(*), sum(`counts`) from reference where `id`=1849 and `started`<= '2000-01-02 12:00:00' and `ended`>='2000-01-01 12:00:00';
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
| 1 | SIMPLE | reference | ref | fk_item_id_idx,idx_started,idx_ended | fk_item_id_idx | 4 | const | 2999 | Using where |
1 row in set (0.00 sec)
正如 Gerald 解释的那样,这是因为从磁盘加载数据。
. 思考什么对表格有意义Reference
。请注意 how
——这控制着缓存。如果您有超过 4GB 的 RAM,我建议将 70% 的可用 RAM 分配给该设置。要是所有问题都得到了很好的记录就好了。可惜真的没有问题。大多数数据库都有内存缓存,当您选择之前选择的数据时,答案可以在内存中找到,无需访问磁盘或其他存储。经过的时间取决于返回的行数和所需的处理量将日期时间与字符串进行比较可能很昂贵,希望您的数据库以相反的方式进行。