表架构:
CREATE SEQUENCE fsa_online_id_seq INCREMENT 1 MINVALUE 1 MAXVALUE 9223372036854775807 START 1 CACHE 1;
CREATE TABLE "public"."fsa_online" (
"id" integer DEFAULT nextval('fsa_online_id_seq') NOT NULL,
"fsa_uuid" uuid NOT NULL,
"use_version" integer,
"last_original_version" integer,
"created_at" timestamp(0),
"updated_at" timestamp(0),
"deleted_at" timestamp(0),
"is_drug" boolean DEFAULT true NOT NULL,
CONSTRAINT "fsa_online_fsa_uuid_unique" UNIQUE ("fsa_uuid"),
CONSTRAINT "fsa_online_pkey" PRIMARY KEY ("id")
) WITH (oids = false);
CREATE INDEX "fsa_online_is_drug_index" ON "public"."fsa_online" USING btree ("is_drug");
CREATE INDEX "fsa_online_last_original_version_index" ON "public"."fsa_online" USING btree ("last_original_version");
CREATE INDEX "fsa_online_use_version_index" ON "public"."fsa_online" USING btree ("use_version");
CREATE SEQUENCE fsa_online_data_id_seq INCREMENT 1 MINVALUE 1 MAXVALUE 9223372036854775807 START 1 CACHE 1;
CREATE TABLE "public"."fsa_online_data" (
"id" integer DEFAULT nextval('fsa_online_data_id_seq') NOT NULL,
"fsa_id" integer NOT NULL,
"reason" text,
"is_original" boolean NOT NULL,
"is_published" boolean DEFAULT true NOT NULL,
"created_by_id" integer,
"created_at" timestamp(0),
"unparsed_data" jsonb,
"raw_id" integer NOT NULL,
"status_id" integer,
"type_id" integer,
"reg_num" character varying(255),
"start_date" date,
"end_date" date,
"docs" jsonb,
"docs_add" text,
"scheme" text,
"free_form" jsonb,
"fio_expert" text,
"lab_info" jsonb,
"change_info" jsonb,
"applicant_info" jsonb,
"manufacturer_info" jsonb,
"product_info" jsonb,
"standard_info" jsonb,
"manufacturer_tbl_info" jsonb,
"product_tbl_info" jsonb,
"certification_info" jsonb,
"trade_name" text,
"cert_num" character varying(255),
"man_form_txt" text,
"manufacturer_name" text,
"man_country_id" integer,
"serial_num" character varying(255),
"serial_size" integer,
"barcode" character varying(255),
"barcode_type_id" integer,
"is_cert" boolean NOT NULL,
"original_data" jsonb,
CONSTRAINT "fsa_online_data_pkey" PRIMARY KEY ("id"),
CONSTRAINT "fsa_online_data_fsa_id_foreign" FOREIGN KEY (fsa_id) REFERENCES fsa_online(id) ON UPDATE CASCADE ON DELETE RESTRICT NOT DEFERRABLE
) WITH (oids = false);
CREATE INDEX "fsa_online_data_barcode_index" ON "public"."fsa_online_data" USING btree ("barcode");
CREATE INDEX "fsa_online_data_barcode_type_id_index" ON "public"."fsa_online_data" USING btree ("barcode_type_id");
CREATE INDEX "fsa_online_data_cert_num_index" ON "public"."fsa_online_data" USING btree ("cert_num");
CREATE INDEX "fsa_online_data_created_by_id_index" ON "public"."fsa_online_data" USING btree ("created_by_id");
CREATE INDEX "fsa_online_data_end_date_index" ON "public"."fsa_online_data" USING btree ("end_date");
CREATE INDEX "fsa_online_data_fsa_id_index" ON "public"."fsa_online_data" USING btree ("fsa_id");
CREATE INDEX "fsa_online_data_is_cert_index" ON "public"."fsa_online_data" USING btree ("is_cert");
CREATE INDEX "fsa_online_data_man_country_id_index" ON "public"."fsa_online_data" USING btree ("man_country_id");
CREATE INDEX "fsa_online_data_raw_id_index" ON "public"."fsa_online_data" USING btree ("raw_id");
CREATE INDEX "fsa_online_data_reg_num_index" ON "public"."fsa_online_data" USING btree ("reg_num");
CREATE INDEX "fsa_online_data_start_date_index" ON "public"."fsa_online_data" USING btree ("start_date");
CREATE INDEX "fsa_online_data_status_id_index" ON "public"."fsa_online_data" USING btree ("status_id");
CREATE INDEX "fsa_online_data_trade_name_index" ON "public"."fsa_online_data" USING btree ("trade_name");
CREATE INDEX "fsa_online_data_type_id_index" ON "public"."fsa_online_data" USING btree ("type_id");
ALTER TABLE "fsa_online" ADD CONSTRAINT "fsa_online_last_original_version_foreign" FOREIGN KEY (last_original_version) REFERENCES fsa_online_data(id) ON UPDATE CASCADE ON DELETE RESTRICT NOT DEFERRABLE;
ALTER TABLE "fsa_online" ADD CONSTRAINT "fsa_online_use_version_foreign" FOREIGN KEY (use_version) REFERENCES fsa_online_data(id) ON UPDATE CASCADE ON DELETE RESTRICT NOT DEFERRABLE;
“fsa_online”表包含大约 800 000 条记录(可能增长到 300 万条记录)
“fsa_online_data”表包含大约 350 万条记录(可能增长到 15-25 万条记录)
我有以下查询:
select
"fsa_online"."id",
"fsa_online"."fsa_uuid",
"fsa_online"."use_version",
"fsa_online"."last_original_version",
"fsa_online"."is_drug"
from
"fsa_online"
inner join "fsa_online_data" as "data" on "data"."id" = CASE WHEN fsa_online.use_version IS NULL THEN fsa_online.last_original_version ELSE fsa_online.use_version END
where
"unparsed_data" is not null
and "fsa_online"."deleted_at" is null
limit 10 offset 0
花费了大约 150 毫秒的时间。
但是当我需要对数据进行排序时,例如按“fsa_online_data”表的“cert_num”列排序,需要花费很长时间(大约 63000 毫秒)。
select
"fsa_online"."id",
"fsa_online"."fsa_uuid",
"fsa_online"."use_version",
"fsa_online"."last_original_version",
"fsa_online"."is_drug"
from
"fsa_online"
inner join "fsa_online_data" as "data" on "data"."id" = CASE WHEN fsa_online.use_version IS NULL THEN fsa_online.last_original_version ELSE fsa_online.use_version END
where
"unparsed_data" is not null
and "fsa_online"."deleted_at" is null
ORDER BY "data"."cert_num"
limit 10 offset 0
我通过向 ORDER BY 语句添加 COLLATE "C"(但这是不正确的,我真的需要将字符串作为 Unicode 字符串进行比较)来做了一个小技巧,查询花费了大约 27500 毫秒。
这是一个查询计划:
"Limit (cost=1037857.75..1037857.78 rows=10 width=545)"
" Output: fsa_online.id, fsa_online.fsa_uuid, fsa_online.use_version, fsa_online.last_original_version, fsa_online.is_drug, ((data.cert_num)::character varying(255))"
" -> Sort (cost=1037857.75..1039085.55 rows=491120 width=545)"
" Output: fsa_online.id, fsa_online.fsa_uuid, fsa_online.use_version, fsa_online.last_original_version, fsa_online.is_drug, ((data.cert_num)::character varying(255))"
" Sort Key: ((data.cert_num)::character varying(255)) COLLATE "C""
" -> Hash Join (cost=972267.98..1027244.83 rows=491120 width=545)"
" Output: fsa_online.id, fsa_online.fsa_uuid, fsa_online.use_version, fsa_online.last_original_version, fsa_online.is_drug, data.cert_num"
" Inner Unique: true"
" Hash Cond: (CASE WHEN (fsa_online.use_version IS NULL) THEN fsa_online.last_original_version ELSE fsa_online.use_version END = data.id)"
" -> Seq Scan on public.fsa_online (cost=0.00..19143.06 rows=899706 width=29)"
" Output: fsa_online.id, fsa_online.fsa_uuid, fsa_online.use_version, fsa_online.last_original_version, fsa_online.is_drug"
" Filter: (fsa_online.deleted_at IS NULL)"
" -> Hash (cost=934126.84..934126.84 rows=2077451 width=17)"
" Output: data.cert_num, data.id"
" -> Seq Scan on public.fsa_online_data data (cost=0.00..934126.84 rows=2077451 width=17)"
" Output: data.cert_num, data.id"
" Filter: (data.unparsed_data IS NOT NULL)"
此外,我需要在不同的文本列上使用 LIKE 语句执行查询(全文搜索不适合这种情况,因为文本列包含任意数据,如序列号)。
select count(*)
from "fsa_online"
inner join "fsa_online_data" as "data" on "data"."id" =
CASE WHEN fsa_online.use_version IS NULL THEN fsa_online.last_original_version ELSE fsa_online.use_version END
结果是大约 900 000 条记录。“fsa_online_data”表大小为 7GB。
硬件清单:
- 三星 SSD EVO 850
- 英特尔酷睿 i7 6700k
- 16GB DDR4 内存
在 PostgreSQL 版本上测试:9.6.9 和 10.5
如何提高文本列的 ORDER BY 操作的性能?我想将查询执行时间减少到 200-300 毫秒。
连接条件中的 CASE 正在杀死你。
您可能应该重组您的数据,以便 use_version 始终包含要使用的版本。如果不能这样做,则在 CASE 表达式上创建一个表达式索引:
查看您的案例陈述,
那应该是,
你可以通过做这样的事情来让它更快,(
"
按照惯例删除愚蠢)