复杂查询
> 文档中心 > 文档中心 > INFINI Easysearch > 功能手册 > SQL 查询 > 复杂查询

复杂查询 #

子查询 (Subquery) #

子查询 (subquery) 是一个完整的 SELECT 语句,它被用在另一个语句中,并用括号括起来。从 explain 输出中,您可以注意到一些子查询实际上被转换为等效的联接查询来执行。

示例 1:表子查询 #

SQL query:

POST /_sql
{
  "query" : """
    SELECT a1.firstname, a1.lastname, a1.balance
    FROM accounts a1
    WHERE a1.account_number IN (
      SELECT a2.account_number
      FROM accounts a2
      WHERE a2.balance > 10000
    )
    """
}

解释:

{
  "Physical Plan" : {
    "Project [ columns=[a1.balance, a1.firstname, a1.lastname] ]" : {
      "Top [ count=200 ]" : {
        "BlockHashJoin[ conditions=( a1.account_number = a2.account_number ), type=JOIN, blockSize=[FixedBlockSize with size=10000] ]" : {
          "Scroll [ accounts as a2, pageSize=10000 ]" : {
            "request" : {
              "size" : 200,
              "query" : {
                "bool" : {
                  "filter" : [
                    {
                      "bool" : {
                        "adjust_pure_negative" : true,
                        "must" : [
                          {
                            "bool" : {
                              "adjust_pure_negative" : true,
                              "must" : [
                                {
                                  "bool" : {
                                    "adjust_pure_negative" : true,
                                    "must_not" : [
                                      {
                                        "bool" : {
                                          "adjust_pure_negative" : true,
                                          "must_not" : [
                                            {
                                              "exists" : {
                                                "field" : "account_number",
                                                "boost" : 1
                                              }
                                            }
                                          ],
                                          "boost" : 1
                                        }
                                      }
                                    ],
                                    "boost" : 1
                                  }
                                },
                                {
                                  "range" : {
                                    "balance" : {
                                      "include_lower" : false,
                                      "include_upper" : true,
                                      "from" : 10000,
                                      "boost" : 1,
                                      "to" : null
                                    }
                                  }
                                }
                              ],
                              "boost" : 1
                            }
                          }
                        ],
                        "boost" : 1
                      }
                    }
                  ],
                  "adjust_pure_negative" : true,
                  "boost" : 1
                }
              },
              "from" : 0
            }
          },
          "Scroll [ accounts as a1, pageSize=10000 ]" : {
            "request" : {
              "size" : 200,
              "from" : 0,
              "_source" : {
                "excludes" : [ ],
                "includes" : [
                  "firstname",
                  "lastname",
                  "balance",
                  "account_number"
                ]
              }
            }
          },
          "useTermsFilterOptimization" : false
        }
      }
    }
  },
  "description" : "Hash Join algorithm builds hash table based on result of first query, and then probes hash table to find matched rows for each row returned by second query",
  "Logical Plan" : {
    "Project [ columns=[a1.balance, a1.firstname, a1.lastname] ]" : {
      "Top [ count=200 ]" : {
        "Join [ conditions=( a1.account_number = a2.account_number ) type=JOIN ]" : {
          "Group" : [
            {
              "Project [ columns=[a1.balance, a1.firstname, a1.lastname, a1.account_number] ]" : {
                "TableScan" : {
                  "tableAlias" : "a1",
                  "tableName" : "accounts"
                }
              }
            },
            {
              "Project [ columns=[a2.account_number] ]" : {
                "Filter [ conditions=[AND ( AND account_number ISN null, AND balance GT 10000 ) ] ]" : {
                  "TableScan" : {
                    "tableAlias" : "a2",
                    "tableName" : "accounts"
                  }
                }
              }
            }
          ]
        }
      }
    }
  }
}

结果集:

a1.firstnamea1.lastnamea1.balance
AmberDuke39225
NanetteBates32838

示例 2:FROM 子句中的子查询 #

SQL query:

POST /_sql
{
  "query" : """
    SELECT a.f, a.l, a.a
    FROM (
      SELECT firstname AS f, lastname AS l, age AS a
      FROM accounts
      WHERE age > 30
    ) AS a
    """
}

解释:

{
  "from" : 0,
  "size" : 200,
  "query" : {
    "bool" : {
      "filter" : [
        {
          "bool" : {
            "must" : [
              {
                "range" : {
                  "age" : {
                    "from" : 30,
                    "to" : null,
                    "include_lower" : false,
                    "include_upper" : true,
                    "boost" : 1.0
                  }
                }
              }
            ],
            "adjust_pure_negative" : true,
            "boost" : 1.0
          }
        }
      ],
      "adjust_pure_negative" : true,
      "boost" : 1.0
    }
  },
  "_source" : {
    "includes" : [
      "firstname",
      "lastname",
      "age"
    ],
    "excludes" : [ ]
  }
}

结果集:

fla
AmberDuke32
DaleAdams33
HattieBond36

下面是子查询中使用聚合函数和 GROUP BY 的另一个示例:

os> SELECT avg_balance FROM (
...   SELECT AVG(balance) AS avg_balance FROM accounts GROUP BY gender, age
... ) AS a;
fetched rows / total rows = 4/4
+---------------+
| avg_balance   |
|---------------|
| 32838.0       |
| 39225.0       |
| 4180.0        |
| 5686.0        |
+---------------+

多层子查询的查询也支持,以下是一个示例:

os> SELECT name FROM (
...   SELECT lastname AS name, age FROM (
...     SELECT * FROM accounts WHERE gender = 'M'
...   ) AS accounts WHERE age < 35
... ) AS accounts
fetched rows / total rows = 2/2
+--------+
| name   |
|--------|
| Duke   |
| Adams  |
+--------+

JOIN #

JOIN 子句通过使用每个索引共有的值来组合一个或多个索引的列。

语法 #

tableSource规则:

tableSource

joinPart规则:

joinPart

示例 1:内联接 (Inner Join) #

内联接是一种非常常用的联接方式,它根据指定的联接谓词,通过组合两个索引的列来创建一个新的结果集。它遍历两个索引,并比较每个文档以找到满足联接谓词的所有文档。使用 JOIN 关键字,并可以选择性地在其前面加上 INNER 关键字。联接谓词通过 ON 子句指定。

注意,对于联接查询,explain API 输出看起来很复杂。这是因为一个联接查询关联了两个底层的 Easysearch DSL 查询,并在单独的查询计划框架中执行。你可以通过查看逻辑计划和物理计划来解释它。

SQL query:

POST /_sql
{
  "query" : """
    SELECT
      a.account_number, a.firstname, a.lastname,
      e.id, e.name
    FROM accounts a
    JOIN employees_nested e
     ON a.account_number = e.id
    """
}

解释:

{
  "Physical Plan" : {
    "Project [ columns=[a.account_number, a.firstname, a.lastname, e.name, e.id] ]" : {
      "Top [ count=200 ]" : {
        "BlockHashJoin[ conditions=( a.account_number = e.id ), type=JOIN, blockSize=[FixedBlockSize with size=10000] ]" : {
          "Scroll [ employees_nested as e, pageSize=10000 ]" : {
            "request" : {
              "size" : 200,
              "from" : 0,
              "_source" : {
                "excludes" : [ ],
                "includes" : [
                  "id",
                  "name"
                ]
              }
            }
          },
          "Scroll [ accounts as a, pageSize=10000 ]" : {
            "request" : {
              "size" : 200,
              "from" : 0,
              "_source" : {
                "excludes" : [ ],
                "includes" : [
                  "account_number",
                  "firstname",
                  "lastname"
                ]
              }
            }
          },
          "useTermsFilterOptimization" : false
        }
      }
    }
  },
  "description" : "Hash Join algorithm builds hash table based on result of first query, and then probes hash table to find matched rows for each row returned by second query",
  "Logical Plan" : {
    "Project [ columns=[a.account_number, a.firstname, a.lastname, e.name, e.id] ]" : {
      "Top [ count=200 ]" : {
        "Join [ conditions=( a.account_number = e.id ) type=JOIN ]" : {
          "Group" : [
            {
              "Project [ columns=[a.account_number, a.firstname, a.lastname] ]" : {
                "TableScan" : {
                  "tableAlias" : "a",
                  "tableName" : "accounts"
                }
              }
            },
            {
              "Project [ columns=[e.name, e.id] ]" : {
                "TableScan" : {
                  "tableAlias" : "e",
                  "tableName" : "employees_nested"
                }
              }
            }
          ]
        }
      }
    }
  }
}

结果集:

a.account_numbera.firstnamea.lastnamee.ide.name
6HattieBond6Jane Smith

示例 2:交叉联接 (Cross Join) #

交叉联接或笛卡尔联接将第一个索引中的每个文档与第二个索引中的每个文档进行组合。结果集是来自两个索引的文档的笛卡尔积。它看起来像是没有使用 ON 子句来指定联接条件的内联接。

注意:即使是在两个中等大小的索引上进行交叉联接也是有风险的。这可能会触发我们的断路器终止查询,以避免内存溢出问题。

SQL query:

POST /_sql
{
  "query" : """
    SELECT
      a.account_number, a.firstname, a.lastname,
      e.id, e.name
    FROM accounts a
    JOIN employees_nested e
    """
}

结果集:

a.account_numbera.firstnamea.lastnamee.ide.name
1AmberDuke3Bob Smith
1AmberDuke4Susan Smith
1AmberDuke6Jane Smith
6HattieBond3Bob Smith
6HattieBond4Susan Smith
6HattieBond6Jane Smith
13NanetteBates3Bob Smith
13NanetteBates4Susan Smith
13NanetteBates6Jane Smith
18DaleAdams3Bob Smith
18DaleAdams4Susan Smith
18DaleAdams6Jane Smith

示例 3:外联接 (Outer Join) #

外联接用于保留一个或两个索引中的文档,尽管它们不满足联接谓词。目前,只支持 LEFT OUTER JOIN,以保留来自第一个索引的行。请注意,OUTER 关键字是可选的。

SQL query:

POST /_sql
{
  "query" : """
    SELECT
      a.account_number, a.firstname, a.lastname,
      e.id, e.name
    FROM accounts a
    LEFT JOIN employees_nested e
     ON a.account_number = e.id
    """
}

结果集:

a.account_numbera.firstnamea.lastnamee.ide.name
1AmberDuke
6HattieBond6Jane Smith
13NanetteBates
18DaleAdams