Inline Views – What is Wrong with this Quote?

18 09 2011

September 18, 2011

I do not recall putting together any articles about inline views, so let’s start out with a couple of examples before taking a look at a quote.  We need to create a table for this example, so I will reuse a slightly modified table creation script from another article, and also collect statistics with 100% sampling.

DROP TABLE T1 PURGE;

CREATE TABLE T1(
  C1 NUMBER,
  C2 NUMBER,
  C3 NUMBER,
  C4 DATE,
  C5 DATE,
  C6 DATE,
  C7 VARCHAR2(20),
  C8 VARCHAR2(20),
  C9 VARCHAR2(20),
  PRIMARY KEY(C1));

INSERT INTO
  T1
SELECT
  ROWNUM,
  DECODE(MOD(ROWNUM,5),0,NULL,ROWNUM),
  DECODE(MOD(ROWNUM,5),0,NULL,ROWNUM),
  TRUNC(SYSDATE)+TRUNC(ROWNUM/100),
  DECODE(MOD(ROWNUM,5),0,NULL,TRUNC(SYSDATE)+TRUNC(ROWNUM/100)),
  DECODE(MOD(ROWNUM,5),0,NULL,TRUNC(SYSDATE)+TRUNC(ROWNUM/100)),
  RPAD(CHR(MOD(ROWNUM-1,26)+65)||
    CHR(MOD(ROWNUM,26)+65)||
    CHR(MOD(ROWNUM+1,26)+65)||
    CHR(MOD(ROWNUM+2,26)+65),20,'A'),
  DECODE(MOD(ROWNUM,5),0,NULL,
   RPAD(CHR(MOD(ROWNUM-1,26)+65)||
    CHR(MOD(ROWNUM,26)+65)||
    CHR(MOD(ROWNUM+1,26)+65)||
    CHR(MOD(ROWNUM+2,26)+65),20,'A')),
  DECODE(MOD(ROWNUM,5),0,NULL,
   RPAD(CHR(MOD(ROWNUM-1,26)+65)||
    CHR(MOD(ROWNUM,26)+65)||
    CHR(MOD(ROWNUM+1,26)+65)||
    CHR(MOD(ROWNUM+2,26)+65),20,'A'))
FROM
  DUAL
CONNECT BY
  LEVEL<=200000;

COMMIT;

CREATE INDEX IND_T1_C2 ON T1(C2);
CREATE INDEX IND_T1_C7 ON T1(C7);

EXEC DBMS_STATS.GATHER_TABLE_STATS(OWNNAME=>USER,TABNAME=>'T1',CASCADE=>TRUE,ESTIMATE_PERCENT=>NULL)

Now, let’s create a simple SQL statement that references the above table 3 times, twice in inline views and once again outside of an inline view:

SET AUTOTRACE TRACEONLY EXPLAIN

SELECT
  T1.C1,
  T1.C2
FROM
  T1,
  (SELECT
     C1,
     C2,
     C3,
     C4
   FROM
     T1
   WHERE
     C1 BETWEEN 1 AND 10000) V1,
  (SELECT
     C1,
     C6,
     C7,
     C8
   FROM
     T1
   WHERE
     C1 BETWEEN 9000 AND 12000) V2
WHERE
  T1.C1=V1.C1
  AND T1.C1=V2.C1;

If I execute the above on Oracle Database 11.2.0.2 I see the following autotrace generated execution plan:

Execution Plan
----------------------------------------------------------
Plan hash value: 3832506612

--------------------------------------------------------------------------------------------
| Id  | Operation                   | Name         | Rows  | Bytes | Cost (%CPU)| Time     |
--------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT            |              |  1002 | 10020 |    17   (0)| 00:00:22 |
|   1 |  TABLE ACCESS BY INDEX ROWID| T1           |  1002 | 10020 |    17   (0)| 00:00:22 |
|*  2 |   INDEX RANGE SCAN          | SYS_C0039297 |  1002 |       |     3   (0)| 00:00:04 |
--------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------
   2 - access("T1"."C1">=9000 AND "T1"."C1"<=10000)

Notice in my query I did not reference any of the columns in view V1 or view V2 – the table T1 appears only a single time in the execution plan.  Let’s fix the query and try again:

SELECT
  T1.C1,
  T1.C2,
  V1.C3,
  V1.C4,
  V2.C6,
  V2.C7,
  V2.C8
FROM
  T1,
  (SELECT
     C1,
     C2,
     C3,
     C4
   FROM
     T1
   WHERE
     C1 BETWEEN 1 AND 10000) V1,
  (SELECT
     C1,
     C6,
     C7,
     C8
   FROM
     T1
   WHERE
     C1 BETWEEN 9000 AND 12000) V2
WHERE
  T1.C1=V1.C1
  AND T1.C1=V2.C1;

Execution Plan
----------------------------------------------------------
Plan hash value: 3832506612

--------------------------------------------------------------------------------------------
| Id  | Operation                   | Name         | Rows  | Bytes | Cost (%CPU)| Time     |
--------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT            |              |  1002 | 68136 |    17   (0)| 00:00:22 |
|   1 |  TABLE ACCESS BY INDEX ROWID| T1           |  1002 | 68136 |    17   (0)| 00:00:22 |
|*  2 |   INDEX RANGE SCAN          | SYS_C0039297 |  1002 |       |     3   (0)| 00:00:04 |
--------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------
   2 - access("T1"."C1">=9000 AND "T1"."C1"<=10000)

The above was the desired result.

Let’s create a second table that is identical to the first table:

DROP TABLE T2 PURGE;

CREATE TABLE T2(
  C1 NUMBER,
  C2 NUMBER,
  C3 NUMBER,
  C4 DATE,
  C5 DATE,
  C6 DATE,
  C7 VARCHAR2(20),
  C8 VARCHAR2(20),
  C9 VARCHAR2(20),
  PRIMARY KEY(C1));

INSERT INTO
  T2
SELECT
  ROWNUM,
  DECODE(MOD(ROWNUM,5),0,NULL,ROWNUM),
  DECODE(MOD(ROWNUM,5),0,NULL,ROWNUM),
  TRUNC(SYSDATE)+TRUNC(ROWNUM/100),
  DECODE(MOD(ROWNUM,5),0,NULL,TRUNC(SYSDATE)+TRUNC(ROWNUM/100)),
  DECODE(MOD(ROWNUM,5),0,NULL,TRUNC(SYSDATE)+TRUNC(ROWNUM/100)),
  RPAD(CHR(MOD(ROWNUM-1,26)+65)||
    CHR(MOD(ROWNUM,26)+65)||
    CHR(MOD(ROWNUM+1,26)+65)||
    CHR(MOD(ROWNUM+2,26)+65),20,'A'),
  DECODE(MOD(ROWNUM,5),0,NULL,
   RPAD(CHR(MOD(ROWNUM-1,26)+65)||
    CHR(MOD(ROWNUM,26)+65)||
    CHR(MOD(ROWNUM+1,26)+65)||
    CHR(MOD(ROWNUM+2,26)+65),20,'A')),
  DECODE(MOD(ROWNUM,5),0,NULL,
   RPAD(CHR(MOD(ROWNUM-1,26)+65)||
    CHR(MOD(ROWNUM,26)+65)||
    CHR(MOD(ROWNUM+1,26)+65)||
    CHR(MOD(ROWNUM+2,26)+65),20,'A'))
FROM
  DUAL
CONNECT BY
  LEVEL<=200000;

COMMIT;

CREATE INDEX IND_T2_C2 ON T2(C2);
CREATE INDEX IND_T2_C7 ON T2(C7);

EXEC DBMS_STATS.GATHER_TABLE_STATS(OWNNAME=>USER,TABNAME=>'T2',CASCADE=>TRUE,ESTIMATE_PERCENT=>NULL)

Let’s modify the last query to access the table T2 in the second inline view and take a look at the autotrace generated execution plan for the query:

SET AUTOTRACE TRACEONLY EXPLAIN

SELECT
  T1.C1,
  T1.C2,
  V1.C3,
  V1.C4,
  V2.C6,
  V2.C7,
  V2.C8
FROM
  T1,
  (SELECT
     C1,
     C2,
     C3,
     C4
   FROM
     T1
   WHERE
     C1 BETWEEN 1 AND 10000) V1,
  (SELECT
     C1,
     C6,
     C7,
     C8
   FROM
     T2
   WHERE
     C1 BETWEEN 9000 AND 12000) V2
WHERE
  T1.C1=V1.C1
  AND T1.C1=V2.C1;

Execution Plan
----------------------------------------------------------
Plan hash value: 2810660667

---------------------------------------------------------------------------------------------
| Id  | Operation                    | Name         | Rows  | Bytes | Cost (%CPU)| Time     |
---------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT             |              |  1001 | 73073 |    35   (3)| 00:00:44 |
|*  1 |  HASH JOIN                   |              |  1001 | 73073 |    35   (3)| 00:00:44 |
|   2 |   TABLE ACCESS BY INDEX ROWID| T1           |  1002 | 23046 |    17   (0)| 00:00:22 |
|*  3 |    INDEX RANGE SCAN          | SYS_C0039297 |  1002 |       |     3   (0)| 00:00:04 |
|   4 |   TABLE ACCESS BY INDEX ROWID| T2           |  1002 | 50100 |    17   (0)| 00:00:22 |
|*  5 |    INDEX RANGE SCAN          | SYS_C0039300 |  1002 |       |     3   (0)| 00:00:04 |
---------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------
   1 - access("T1"."C1"="C1")
   3 - access("T1"."C1">=9000 AND "T1"."C1"<=10000)
   5 - access("C1">=9000 AND "C1"<=10000)

If you do not like inline views you can instead take advantage of subquery factoring (WITH blocks):

WITH
  V1 AS (SELECT
     C1,
     C2,
     C3,
     C4
   FROM
     T1
   WHERE
     C1 BETWEEN 1 AND 10000),
  V2 AS (SELECT
     C1,
     C6,
     C7,
     C8
   FROM
     T2
   WHERE
     C1 BETWEEN 9000 AND 12000)
SELECT
  T1.C1,
  T1.C2,
  V1.C3,
  V1.C4,
  V2.C6,
  V2.C7,
  V2.C8
FROM
  T1,
  V1,
  V2
WHERE
  T1.C1=V1.C1
  AND T1.C1=V2.C1;

Execution Plan
----------------------------------------------------------
Plan hash value: 2810660667

---------------------------------------------------------------------------------------------
| Id  | Operation                    | Name         | Rows  | Bytes | Cost (%CPU)| Time     |
---------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT             |              |  1001 | 73073 |    35   (3)| 00:00:44 |
|*  1 |  HASH JOIN                   |              |  1001 | 73073 |    35   (3)| 00:00:44 |
|   2 |   TABLE ACCESS BY INDEX ROWID| T1           |  1002 | 23046 |    17   (0)| 00:00:22 |
|*  3 |    INDEX RANGE SCAN          | SYS_C0039297 |  1002 |       |     3   (0)| 00:00:04 |
|   4 |   TABLE ACCESS BY INDEX ROWID| T2           |  1002 | 50100 |    17   (0)| 00:00:22 |
|*  5 |    INDEX RANGE SCAN          | SYS_C0039300 |  1002 |       |     3   (0)| 00:00:04 |
---------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------
   1 - access("T1"."C1"="C1")
   3 - access("T1"."C1">=9000 AND "T1"."C1"<=10000)
   5 - access("C1">=9000 AND "C1"<=10000)

Hints may be used to change the optimization behavior of the query optimizer.  For instance, if we specify the NO_QUERY_TRANSFORMATION hint in the last of the above queries that used inline views, the autotrace generated execution plan appears as follows:

SELECT /*+ NO_QUERY_TRANSFORMATION */
  T1.C1,
  T1.C2,
  V1.C3,
  V1.C4,
  V2.C6,
  V2.C7,
  V2.C8
FROM
  T1,
  (SELECT
     C1,
     C2,
     C3,
     C4
   FROM
     T1
   WHERE
     C1 BETWEEN 1 AND 10000) V1,
  (SELECT
     C1,
     C6,
     C7,
     C8
   FROM
     T2
   WHERE
     C1 BETWEEN 9000 AND 12000) V2
WHERE
  T1.C1=V1.C1
  AND T1.C1=V2.C1;

Execution Plan
----------------------------------------------------------
Plan hash value: 3681900949

------------------------------------------------------------------------------------------------
| Id  | Operation                       | Name         | Rows  | Bytes | Cost (%CPU)| Time     |
------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT                |              |  1002 | 91182 |  1037   (1)| 00:21:33 |
|*  1 |  HASH JOIN                      |              |  1002 | 91182 |  1037   (1)| 00:21:33 |
|   2 |   NESTED LOOPS                  |              |       |       |            |          |
|   3 |    NESTED LOOPS                 |              |  1002 | 45090 |  1019   (0)| 00:21:11 |
|   4 |     VIEW                        |              |  1002 | 35070 |    17   (0)| 00:00:22 |
|   5 |      TABLE ACCESS BY INDEX ROWID| T1           |  1002 | 18036 |    17   (0)| 00:00:22 |
|*  6 |       INDEX RANGE SCAN          | SYS_C0039297 |  1002 |       |     3   (0)| 00:00:04 |
|*  7 |     INDEX UNIQUE SCAN           | SYS_C0039297 |     1 |       |     0   (0)| 00:00:01 |
|   8 |    TABLE ACCESS BY INDEX ROWID  | T1           |     1 |    10 |     1   (0)| 00:00:02 |
|   9 |   VIEW                          |              |  1002 | 46092 |    17   (0)| 00:00:22 |
|  10 |    TABLE ACCESS BY INDEX ROWID  | T2           |  1002 | 50100 |    17   (0)| 00:00:22 |
|* 11 |     INDEX RANGE SCAN            | SYS_C0039300 |  1002 |       |     3   (0)| 00:00:04 |
------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------
   1 - access("T1"."C1"="V2"."C1")
   6 - access("C1">=9000 AND "C1"<=10000)
   7 - access("T1"."C1"="V1"."C1")
  11 - access("C1">=9000 AND "C1"<=10000)

For fun, on the Enterprise Edition of Oracle Database (if I remember correctly, the Standard Edition will behave differentlyEdit Sept. 18, 2011: just tested 11.2.0.2 Standard Edition and 10.2.0.5 Standard Edition – both versions generated execution plans similar to what is displayed below, so the MATERIALIZE hint apparently does not behave differently as originally described) we can also use the MATERIALIZE hint using the subquery factored (WITH block) version of the query:

WITH
  V1 AS (SELECT /*+ MATERIALIZE */
     C1,
     C2,
     C3,
     C4
   FROM
     T1
   WHERE
     C1 BETWEEN 1 AND 10000),
  V2 AS (SELECT /*+ MATERIALIZE */
     C1,
     C6,
     C7,
     C8
   FROM
     T2
   WHERE
     C1 BETWEEN 9000 AND 12000)
SELECT
  T1.C1,
  T1.C2,
  V1.C3,
  V1.C4,
  V2.C6,
  V2.C7,
  V2.C8
FROM
  T1,
  V1,
  V2
WHERE
  T1.C1=V1.C1
  AND T1.C1=V2.C1;

Execution Plan
----------------------------------------------------------
Plan hash value: 1193025797

-----------------------------------------------------------------------------------------------------------
| Id  | Operation                     | Name                      | Rows  | Bytes | Cost (%CPU)| Time     |
-----------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT              |                           |   150 | 13650 |  1491   (1)| 00:31:00 |
|   1 |  TEMP TABLE TRANSFORMATION    |                           |       |       |            |          |
|   2 |   LOAD AS SELECT              | SYS_TEMP_0FD9D6625_D08144 |       |       |            |          |
|   3 |    TABLE ACCESS BY INDEX ROWID| T1                        | 10000 |   224K|   151   (0)| 00:03:09 |
|*  4 |     INDEX RANGE SCAN          | SYS_C0039297              | 10000 |       |    20   (0)| 00:00:25 |
|   5 |   LOAD AS SELECT              | SYS_TEMP_0FD9D6626_D08144 |       |       |            |          |
|   6 |    TABLE ACCESS BY INDEX ROWID| T2                        |  3002 |   146K|    47   (0)| 00:00:59 |
|*  7 |     INDEX RANGE SCAN          | SYS_C0039300              |  3002 |       |     7   (0)| 00:00:09 |
|*  8 |   HASH JOIN                   |                           |   150 | 13650 |  1293   (1)| 00:26:53 |
|*  9 |    HASH JOIN                  |                           |  3002 |   164K|  1276   (1)| 00:26:31 |
|  10 |     VIEW                      |                           |  3002 |   134K|    12   (0)| 00:00:15 |
|  11 |      TABLE ACCESS FULL        | SYS_TEMP_0FD9D6626_D08144 |  3002 |   146K|    12   (0)| 00:00:15 |
|  12 |     TABLE ACCESS FULL         | T1                        |   200K|  1953K|  1263   (0)| 00:26:16 |
|  13 |    VIEW                       |                           | 10000 |   341K|    17   (0)| 00:00:22 |
|  14 |     TABLE ACCESS FULL         | SYS_TEMP_0FD9D6625_D08144 | 10000 |   224K|    17   (0)| 00:00:22 |
-----------------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------
   4 - access("C1">=1 AND "C1"<=10000)
   7 - access("C1">=9000 AND "C1"<=12000)
   8 - access("T1"."C1"="V1"."C1")
   9 - access("T1"."C1"="V2"."C1")

Ouch, an estimated 31 minutes to complete?

SET AUTOTRACE OFF

SELECT
  PNAME,
  PVAL1
FROM
  SYS.AUX_STATS$
WHERE
  PNAME IN ('SREADTIM','MREADTIM');

PNAME         PVAL1
-------- ----------
SREADTIM   1247.132
MREADTIM   3575.567

Must be that darn floppy RAID 10 array again.  Let’s make it faster:

EXEC DBMS_STATS.SET_SYSTEM_STATS('SREADTIM',0.4)
EXEC DBMS_STATS.SET_SYSTEM_STATS('MREADTIM',0.8)

SET AUTOTRACE TRACEONLY EXPLAIN

WITH
  V1 AS (SELECT /*+ MATERIALIZE */
     C1,
     C2,
     C3,
     C4
   FROM
     T1
   WHERE
     C1 BETWEEN 1 AND 10000),
  V2 AS (SELECT /*+ MATERIALIZE */
     C1,
     C6,
     C7,
     C8
   FROM
     T2
   WHERE
     C1 BETWEEN 9000 AND 12000)
SELECT
  T1.C1,
  T1.C2,
  V1.C3,
  V1.C4,
  V2.C6,
  V2.C7,
  V2.C8
FROM
  T1,
  V1,
  V2
WHERE
  T1.C1=V1.C1
  AND T1.C1=V2.C1;

Execution Plan
----------------------------------------------------------
Plan hash value: 1626848331

-----------------------------------------------------------------------------------------------------------
| Id  | Operation                     | Name                      | Rows  | Bytes | Cost (%CPU)| Time     |
-----------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT              |                           |   150 | 13650 |  1211  (10)| 00:00:01 |
|   1 |  TEMP TABLE TRANSFORMATION    |                           |       |       |            |          |
|   2 |   LOAD AS SELECT              | SYS_TEMP_0FD9D6627_D08144 |       |       |            |          |
|   3 |    TABLE ACCESS BY INDEX ROWID| T1                        | 10000 |   224K|   158   (5)| 00:00:01 |
|*  4 |     INDEX RANGE SCAN          | SYS_C0039297              | 10000 |       |    23  (14)| 00:00:01 |
|   5 |   LOAD AS SELECT              | SYS_TEMP_0FD9D6628_D08144 |       |       |            |          |
|   6 |    TABLE ACCESS BY INDEX ROWID| T2                        |  3002 |   146K|    49   (5)| 00:00:01 |
|*  7 |     INDEX RANGE SCAN          | SYS_C0039300              |  3002 |       |     8  (13)| 00:00:01 |
|*  8 |   HASH JOIN                   |                           |   150 | 13650 |  1004  (11)| 00:00:01 |
|*  9 |    HASH JOIN                  |                           |  3002 |   164K|   987  (10)| 00:00:01 |
|  10 |     VIEW                      |                           |  3002 |   134K|    10  (10)| 00:00:01 |
|  11 |      TABLE ACCESS FULL        | SYS_TEMP_0FD9D6628_D08144 |  3002 |   146K|    10  (10)| 00:00:01 |
|  12 |     TABLE ACCESS FULL         | T1                        |   200K|  1953K|   950   (8)| 00:00:01 |
|  13 |    VIEW                       |                           | 10000 |   341K|    15  (20)| 00:00:01 |
|  14 |     TABLE ACCESS FULL         | SYS_TEMP_0FD9D6627_D08144 | 10000 |   224K|    15  (20)| 00:00:01 |
-----------------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------
   4 - access("C1">=1 AND "C1"<=10000)
   7 - access("C1">=9000 AND "C1"<=12000)
   8 - access("T1"."C1"="V1"."C1")
   9 - access("T1"."C1"="V2"."C1")

A one second estimated completion time, that is better.  Bugs…

Now that hopefully everyone is on the same page about what an inline view is and how to use one in a query, let’s take a look at two quotes about inline views from the book Oracle 11g Performance Tuning Recipes:

Page 291 (click the link to see the quote in full context):

“While extremely useful, if misused or overused, inline views can cause database performance issues, especially in terms of the use of the temporary tablespace. Since inline views are created and used only for the duration of a query, their results are held in the program global memory area, and if too large, the temporary tablespace.”

Page 292 (note that this quote is quite a bit longer than I would prefer, but it is as short as possible without losing the context of the quote):

“In this query, there are three inline views created: the SERVICE_INFO view, the PRODUCT_INFO view, and the BILLING_INFO view. Each of these queries will be processed and the results stored in the program global area or the temporary tablespace before finally processing the true end-user query, which starts with the final SELECT statement shown in the query.  In examples such as these, it is generally more efficient at the database level to create tables that hold the data defined by the inline views—in this case, three separate tables. Then, the final query can be extracted from joining the three permanent tables to generate the results.”

What, if anything, is wrong with the above quote from the book?

The point of blog articles like this one is not to insult authors who have spent thousands of hours carefully constructing an accurate and helpful book, but instead to suggest that readers investigate when something stated does not exactly match what one believes to be true.  It could be that the author “took a long walk down a short pier”, or that the author is revealing accurate information which simply cannot be found through other resources (and may in the process be directly contradicting information sources you have used in the past).  If you do not investigate in such cases, you may lose an important opportunity to learn something that could prove to be extremely valuable.


Actions

Information

7 responses

18 09 2011
Gary

Simply put, the optimizer can do any sort of ‘magic’ with an inline view that it could with a conventional view. It may merge the view and never ‘crystalize’ the data in it. While an inline view misses the reuse benefit of a conventional view, it also make the complexity of a query apparent in a way that may be hidden with a conventional view.

Personally I like inline views as they make the logic more obvious, for example keeping filter conditions close to the table being filtered.

18 09 2011
Dom Brooks

It’s a bit worrying that the author/s seem to believe that inline views are “created”, even if we presume they mean materialised – and I’m not sure that this is what they mean.

Have they never heard for example of the whole query transformation process and view merging?

Regarding the second quote, a lot of effort is spent on the various forums fighting developer intentions to create working/temporary tables with interim data sets. This is a practice normally used by developers who do not have a strong Oracle background and more common with Sql Server and Sybase. In complete contrast to this second quote, the most efficient practice in Oracle is usually to write a single sql statement, inline views or not.

Furthermore, the quote at the bottom of page 292 that “large inline views can easily consume a large amount of temporary tablespace” is misleading. You could read it as meaning the length of the inline view query text. However it is consuming large amounts of data notably with insufficiently sized work areas which is a primary cause of queries having “problem” temporary tablespace usage and I would suggest that that is as likely with any sql statement.

On the plus side, it is true that materialising subqueries or views should be done with care and it’s wise to point out that there can be issues with views – notably with predicate pushing or lack of and not merging etc – and particularly because there are certain optimisations that don’t seem to happen with the WITH clause, etc. E.g. http://jonathanlewis.wordpress.com/category/oracle/tuning/subquery-factoring/

19 09 2011
Charles Hooper

#2, #3, and #4 in that series seem to apply. One of the articles also linked to the following article that was written by Randolf Geist:

http://oracle-randolf.blogspot.com/2008/01/subquery-factoring-and-cost-based-query.html

(more comments later)

18 09 2011
Charles Hooper

I will try to keep my comments in this blog article to a minimum for the next 24 hours, at which point I plan to post my review comments for recipe 8-15 “Creating Efficient Temporary Views”. Needless to say, I was very surprised when I read this recipe’s contents in a performance tuning book. The general tone of my review comments do not disagree with the two comments currently attached to this blog article.

I think that more could still be mentioned about what is wrong; how the material can be restated to help someone trying to learn from this book; when views should and should not be used (statically defined and otherwise); the difference between inline views, subquery factoring, statically defined views. and materialized views; and etc.

(Side note: checking the Google Books link as Dom Brooks apparently did was a good idea.)

19 09 2011
Charles Hooper

Gary’s response in this blog article is very good and manages to drive directly to the point – and is probably a better answer using 100 words or less than what I could have put together using 100 words or less.

Dom Brooks’ answer not only examines the quoted section, but also the book page contents as a whole. He raised a valid point that developers posting to forums have historically had difficulty with either thinking using set based logic (wanting to do things slow-by-slow) or not attempting silly things on an Oracle platform such as creating intermediate result tables (either temporary or permanent) on the fly – as is likely typical in a SQL Server environment. It hurts the cause of educating developers when an Oracle performance book essentially tells not only the developers but also the DBAs to go ahead and create those tables because doing so will help improve performance.

As promised, below is the portion of my book review for this particular recipe (I may slightly adjust the wording before finalizing the second half of the book review – the review of this recipe was written before I posted this blog article). I missed some of the finer points mentioned by Gary and Dom Brooks, but also included a couple of points that were not covered in their comments. The book review is currently just over 29 type-written pages long for the first 10 chapters.

Recipe 8-15 “Creating Efficient Temporary Views” shows how to create inline views, and provides performance warnings about their use. The authors of the book make several critical mistakes in this recipe, giving the readers the impression that the rows requested by inline views are always materialized into tables, and as such should be avoided whenever possible. The SQL statement found in the How it Works section uses subquery factoring (WITH blocks) rather than inline views. Page 292 states about the query found in the How it Works section, “In examples such as these, it is generally more efficient at the database level to create tables that hold the data defined by the inline views—in this case, three separate tables. Then, the final query can be extracted from joining the three permanent tables to generate the results.” The recipe also contains the following cautionary note, “Large inline views can easily consume a large amount of temporary tablespace.” There is more wrong than right in this recipe. Oracle Database does not materialized every inline view, creating a temp table such as SYS_TEMP_0FD9D6628_D08144, as it processes the SQL statement. The optimizer is free to materialize an inline view into a temp table, but it is also free to push predicates into the inline view, and completely rewrite the original SQL statement so that it no longer contains an inline view. The book’s suggested alternative of storing the intermediate results of inline views in permanent tables, unnecessarily generating redo and undo in the process, is very strange (if we must head in this direction, why not suggest using materialized views). The original SQL statement with the inline views (the example SQL statement in the recipe shows factored subqueries – WITH blocks) is able to take advantage of not only pushed predicates from the parent query (and other inline views) to further restrict the rows requested by the inline view, but also to take advantage of any indexes and partitioning that may be present in the parent tables. Additionally, when troubleshooting the performance or accuracy of a SQL statement it is helpful to have the full SQL text that generates the resultset in a single SQL statement (this is an advantage of inline views over both statistically defined views and the approach suggested by the book). (pages 290-292)

13 07 2012
Paul

I had a similar reaction to this recipe. i understand the points made above. Still, I thought, why not do some tests?

connect perfstat/password@s

select count(*) from 
( select /*+ no_merge */  * from dba_hist_sqlstat) a,
dba_hist_sql_plan b
where a.sql_id = b.sql_id
/
exec bcf_snapsess.snapself('xtemp2.sql inline view');

connect perfstat/pssword@s
select count(*) from 
dba_hist_sqlstat a,
dba_hist_sql_plan b
where a.sql_id = b.sql_id
/
exec bcf_snapsess.snapself('xtemp2.sql NO inline view');

The results of the queries – 3.3 million rows in the join of the two tables.

The calls to bcf_snapsess record v$sesstat and v$session_event and a few other things for the given session. Comparing that information from the two test sessions/queries, I found that in this case the session that used the inline view had “session pga%” statistics about 10% greater than without the inline view, not a large increase, but it is an increase. I had hoped to be able to compare the direct path write temp usage however neither session had to wait on that event. I’m tempted to run some tests against larger data sets, or tweak this one to fetch more than count(*)..

16 07 2012
Charles Hooper

Paul,

Sorry about the delay in responding. I am curious to know if you are reading the book (or have already read it) – the book seems to have an overall high rating on Amazon.com, with only one review (mine) mentioning that there are errors in the book.

What partially complicates matters with your test case is the use of the DBA_HIST_SQL_PLAN and the DBA_HIST_SQLSTAT views… which of course are views on tables that probably do not have up to date statistics. The NO_MERGE hint could be sufficient to change the execution plan from one that used a nested loops join, to one that uses a hash join – I suspect that change could explain the increase in the PGA usage when the NO_MERGE hint was used.

Here is a small modification that I made to your test case. I first specified the query with the inline view with the NO_MERGE hint, then removed the NO_MERGE hint, then executed your SQL statement without the inline view. If the two quotes from the book are correct, the execution plans for the first and second queries should have similar execution plans, and the last query should have a different execution plan:

SET LINESIZE 140
SET PAGESIZE 1000
 
select /*+ GATHER_PLAN_STATISTICS */ count(*) from 
( select /*+ no_merge */  * from dba_hist_sqlstat) a,
dba_hist_sql_plan b
where a.sql_id = b.sql_id;
 
SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY_CURSOR(NULL,NULL,'ALLSTATS LAST'));
 
select /*+ GATHER_PLAN_STATISTICS */ count(*) from 
( select * from dba_hist_sqlstat) a,
dba_hist_sql_plan b
where a.sql_id = b.sql_id;
 
SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY_CURSOR(NULL,NULL,'ALLSTATS LAST'));
 
select /*+ GATHER_PLAN_STATISTICS */ count(*) from 
dba_hist_sqlstat a,
dba_hist_sql_plan b
where a.sql_id = b.sql_id;
 
SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY_CURSOR(NULL,NULL,'ALLSTATS LAST'));

Here is the execution plan for the first query (using the sample database from Oracle Corporation, freshly started about 15 minutes ago) – notice that the plan hash value is 1978021653:

SQL_ID  6nm72h1z4k2qg, child number 0
-------------------------------------
select /*+ GATHER_PLAN_STATISTICS */ count(*) from ( select /*+
no_merge */  * from dba_hist_sqlstat) a, dba_hist_sql_plan b where
a.sql_id = b.sql_id
 
Plan hash value: 1978021653
 
--------------------------------------------------------------------------------------------------------------
| Id  | Operation                       | Name             | Starts | E-Rows | A-Rows |   A-Time   | Buffers |
--------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT                |                  |      1 |        |      1 |00:00:00.04 |    6069 |
|   1 |  SORT AGGREGATE                 |                  |      1 |      1 |      1 |00:00:00.04 |    6069 |
|   2 |   NESTED LOOPS                  |                  |      1 |      1 |   4774 |00:00:00.10 |    6069 |
|   3 |    VIEW                         |                  |      1 |      1 |    288 |00:00:00.01 |     298 |
|   4 |     NESTED LOOPS                |                  |      1 |        |    288 |00:00:00.01 |     298 |
|   5 |      NESTED LOOPS               |                  |      1 |      1 |    288 |00:00:00.01 |      10 |
|   6 |       PARTITION RANGE ALL       |                  |      1 |      1 |    288 |00:00:00.01 |       6 |
|   7 |        INDEX FULL SCAN          | WRH$_SQLSTAT_PK  |      2 |      1 |    288 |00:00:00.01 |       6 |
|*  8 |       INDEX UNIQUE SCAN         | WRM$_SNAPSHOT_PK |    288 |      1 |    288 |00:00:00.01 |       4 |
|*  9 |      TABLE ACCESS BY INDEX ROWID| WRM$_SNAPSHOT    |    288 |      1 |    288 |00:00:00.01 |     288 |
|* 10 |    INDEX FULL SCAN              | WRH$_SQL_PLAN_PK |    288 |      1 |   4774 |00:00:00.02 |    5771 |
--------------------------------------------------------------------------------------------------------------
 
Predicate Information (identified by operation id):
---------------------------------------------------
   8 - access("SN"."DBID"="SQL"."DBID" AND "SN"."SNAP_ID"="SQL"."SNAP_ID" AND
              "SN"."INSTANCE_NUMBER"="SQL"."INSTANCE_NUMBER")
   9 - filter("SN"."STATUS"=0)
  10 - access("A"."SQL_ID"="SQL_ID")
       filter("A"."SQL_ID"="SQL_ID")

Here is the execution plan for the second query – notice that the plan hash value is 2653281973 and the word “VIEW” does not appear anywhere in the execution plan:

SQL_ID  6yd5fx95gay7s, child number 0
-------------------------------------
select /*+ GATHER_PLAN_STATISTICS */ count(*) from ( select * from
dba_hist_sqlstat) a, dba_hist_sql_plan b where a.sql_id = b.sql_id
 
Plan hash value: 2653281973
 
------------------------------------------------------------------------------------------------------------
| Id  | Operation                     | Name             | Starts | E-Rows | A-Rows |   A-Time   | Buffers |
------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT              |                  |      1 |        |      1 |00:00:00.05 |   10551 |
|   1 |  SORT AGGREGATE               |                  |      1 |      1 |      1 |00:00:00.05 |   10551 |
|   2 |   NESTED LOOPS                |                  |      1 |        |   4774 |00:00:00.12 |   10551 |
|   3 |    NESTED LOOPS               |                  |      1 |      1 |   4774 |00:00:00.11 |    5777 |
|   4 |     NESTED LOOPS              |                  |      1 |      1 |   4774 |00:00:00.11 |    5773 |
|   5 |      PARTITION RANGE ALL      |                  |      1 |      1 |    288 |00:00:00.01 |       5 |
|   6 |       INDEX FULL SCAN         | WRH$_SQLSTAT_PK  |      2 |      1 |    288 |00:00:00.01 |       5 |
|*  7 |      INDEX FULL SCAN          | WRH$_SQL_PLAN_PK |    288 |      1 |   4774 |00:00:00.03 |    5768 |
|*  8 |     INDEX UNIQUE SCAN         | WRM$_SNAPSHOT_PK |   4774 |      1 |   4774 |00:00:00.01 |       4 |
|*  9 |    TABLE ACCESS BY INDEX ROWID| WRM$_SNAPSHOT    |   4774 |      1 |   4774 |00:00:00.01 |    4774 |
------------------------------------------------------------------------------------------------------------
  
Predicate Information (identified by operation id):
---------------------------------------------------
   7 - access("SQL_ID"="SQL_ID")
       filter("SQL_ID"="SQL_ID")
   8 - access("SN"."DBID"="SQL"."DBID" AND "SN"."SNAP_ID"="SQL"."SNAP_ID" AND
              "SN"."INSTANCE_NUMBER"="SQL"."INSTANCE_NUMBER")
   9 - filter("SN"."STATUS"=0)

Here is the execution plan for the third query – notice that the plan hash value is 2653281973, the same as the second query, and that the word “VIEW” does not appear anywhere in the execution plan even though the query is selecting from two views:

SQL_ID  8xu1f11zugfyf, child number 0
-------------------------------------
select /*+ GATHER_PLAN_STATISTICS */ count(*) from dba_hist_sqlstat a,
dba_hist_sql_plan b where a.sql_id = b.sql_id
 
Plan hash value: 2653281973
 
------------------------------------------------------------------------------------------------------------
| Id  | Operation                     | Name             | Starts | E-Rows | A-Rows |   A-Time   | Buffers |
------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT              |                  |      1 |        |      1 |00:00:00.05 |   10551 |
|   1 |  SORT AGGREGATE               |                  |      1 |      1 |      1 |00:00:00.05 |   10551 |
|   2 |   NESTED LOOPS                |                  |      1 |        |   4774 |00:00:00.12 |   10551 |
|   3 |    NESTED LOOPS               |                  |      1 |      1 |   4774 |00:00:00.11 |    5777 |
|   4 |     NESTED LOOPS              |                  |      1 |      1 |   4774 |00:00:00.10 |    5773 |
|   5 |      PARTITION RANGE ALL      |                  |      1 |      1 |    288 |00:00:00.01 |       5 |
|   6 |       INDEX FULL SCAN         | WRH$_SQLSTAT_PK  |      2 |      1 |    288 |00:00:00.01 |       5 |
|*  7 |      INDEX FULL SCAN          | WRH$_SQL_PLAN_PK |    288 |      1 |   4774 |00:00:00.02 |    5768 |
|*  8 |     INDEX UNIQUE SCAN         | WRM$_SNAPSHOT_PK |   4774 |      1 |   4774 |00:00:00.01 |       4 |
|*  9 |    TABLE ACCESS BY INDEX ROWID| WRM$_SNAPSHOT    |   4774 |      1 |   4774 |00:00:00.01 |    4774 |
------------------------------------------------------------------------------------------------------------
 
Predicate Information (identified by operation id):
---------------------------------------------------
   7 - access("SQL_ID"="SQL_ID")
       filter("SQL_ID"="SQL_ID")
   8 - access("SN"."DBID"="SQL"."DBID" AND "SN"."SNAP_ID"="SQL"."SNAP_ID" AND
              "SN"."INSTANCE_NUMBER"="SQL"."INSTANCE_NUMBER")
   9 - filter("SN"."STATUS"=0)

Maybe a test case using regular tables would be a little easier for everyone to follow. Maybe the following tables could be used as a starting point:

CREATE TABLE T1(
  C1 NUMBER,
  C2 NUMBER,
  C3 NUMBER,
  C4 DATE,
  C5 DATE,
  C6 DATE,
  C7 VARCHAR2(20),
  C8 VARCHAR2(20),
  C9 VARCHAR2(20),
  PRIMARY KEY(C1));
 
INSERT INTO
  T1
SELECT
  ROWNUM,
  DECODE(MOD(ROWNUM,5),0,NULL,ROWNUM),
  DECODE(MOD(ROWNUM,5),0,NULL,ROWNUM),
  TRUNC(SYSDATE)+TRUNC(ROWNUM/100),
  DECODE(MOD(ROWNUM,5),0,NULL,TRUNC(SYSDATE)+TRUNC(ROWNUM/100)),
  DECODE(MOD(ROWNUM,5),0,NULL,TRUNC(SYSDATE)+TRUNC(ROWNUM/100)),
  RPAD(CHR(MOD(ROWNUM-1,26)+65)||
    CHR(MOD(ROWNUM,26)+65)||
    CHR(MOD(ROWNUM+1,26)+65)||
    CHR(MOD(ROWNUM+2,26)+65),20,'A'),
  DECODE(MOD(ROWNUM,5),0,NULL,
   RPAD(CHR(MOD(ROWNUM-1,26)+65)||
    CHR(MOD(ROWNUM,26)+65)||
    CHR(MOD(ROWNUM+1,26)+65)||
    CHR(MOD(ROWNUM+2,26)+65),20,'A')),
  DECODE(MOD(ROWNUM,5),0,NULL,
   RPAD(CHR(MOD(ROWNUM-1,26)+65)||
    CHR(MOD(ROWNUM,26)+65)||
    CHR(MOD(ROWNUM+1,26)+65)||
    CHR(MOD(ROWNUM+2,26)+65),20,'A'))
FROM
  DUAL
CONNECT BY
  LEVEL<=1000000;
 
COMMIT;
 
CREATE INDEX IND_T1_C2 ON T1(C2);
CREATE INDEX IND_T1_C7 ON T1(C7);
 
EXEC DBMS_STATS.GATHER_TABLE_STATS(OWNNAME=>USER,TABNAME=>'T1',CASCADE=>TRUE,ESTIMATE_PERCENT=>NULL)
 
CREATE TABLE T2(
  C1 NUMBER,
  C2 NUMBER,
  C3 NUMBER,
  C4 DATE,
  C5 DATE,
  C6 DATE,
  C7 VARCHAR2(20),
  C8 VARCHAR2(20),
  C9 VARCHAR2(20),
  PRIMARY KEY(C1));
 
INSERT INTO
  T2
SELECT
  ROWNUM,
  DECODE(MOD(ROWNUM,5),0,NULL,ROWNUM),
  DECODE(MOD(ROWNUM,5),0,NULL,ROWNUM),
  TRUNC(SYSDATE)+TRUNC(ROWNUM/100),
  DECODE(MOD(ROWNUM,5),0,NULL,TRUNC(SYSDATE)+TRUNC(ROWNUM/100)),
  DECODE(MOD(ROWNUM,5),0,NULL,TRUNC(SYSDATE)+TRUNC(ROWNUM/100)),
  RPAD(CHR(MOD(ROWNUM-1,26)+65)||
    CHR(MOD(ROWNUM,26)+65)||
    CHR(MOD(ROWNUM+1,26)+65)||
    CHR(MOD(ROWNUM+2,26)+65),20,'A'),
  DECODE(MOD(ROWNUM,5),0,NULL,
   RPAD(CHR(MOD(ROWNUM-1,26)+65)||
    CHR(MOD(ROWNUM,26)+65)||
    CHR(MOD(ROWNUM+1,26)+65)||
    CHR(MOD(ROWNUM+2,26)+65),20,'A')),
  DECODE(MOD(ROWNUM,5),0,NULL,
   RPAD(CHR(MOD(ROWNUM-1,26)+65)||
    CHR(MOD(ROWNUM,26)+65)||
    CHR(MOD(ROWNUM+1,26)+65)||
    CHR(MOD(ROWNUM+2,26)+65),20,'A'))
FROM
  DUAL
CONNECT BY
  LEVEL<=1000000;
 
COMMIT;
 
CREATE INDEX IND_T2_C2 ON T2(C2);
CREATE INDEX IND_T2_C7 ON T2(C7);
 
EXEC DBMS_STATS.GATHER_TABLE_STATS(OWNNAME=>USER,TABNAME=>'T2',CASCADE=>TRUE,ESTIMATE_PERCENT=>NULL)

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s




Follow

Get every new post delivered to your Inbox.

Join 148 other followers

%d bloggers like this: