This preview shows page 1-2-3-4-5-6-7-8-9-65-66-67-68-69-70-71-72-73-74-130-131-132-133-134-135-136-137-138 out of 138 pages.
Version(March(4,(2011(Lectures 21-23: Query Optimization Introduction to Database Systems CSE 444, Winter 2011 h2p://www.cs.washington.edu/educa<on/courses/cse444/11wi/((((Where we are / and where we go 2(Review Relational Algebra Supplier(sid,(sname,(scity,(sstate)(Supply(sid,(pno,(quan<ty)(SELECT sname FROM Supplier x, Supply y WHERE x.sid = y.sid and y.pno = 2 and x.scity = ʻSeattleʼ and x.sstate = ʻWAʼ Give(a(rela<onal(algebra(expression(for(this(query:(Supplier( Supply(sid(=(sid(σ(scity=ʻSea2leʼ∧(sstate=ʻWAʼ∧(pno=2(π sname(π sname(σ scity=ʻSea2leʼ∧(sstate=ʻWAʼ∧(pno=2((Supplier((((((sid(=(sid(Supply))(3(Key Idea: Algebraic Optimization N(=(((z*2)+((z*3)+y))/x((Given(x(=(1,(y(=(0,(and(z(=(4,(solve(for(N.(((In(what(order(did(you(perform(the(opera<ons?(And(how(many(opera<ons?(4(Key Idea: Algebraic Optimization N(=(((z*2)+((z*3)+0))/1((Given(x(=(1,(y(=(0,(and(z(=(4,(solve(for(N(again,((but(now(assume:((((*(costs(10(units((((+(costs(2(units((((/(costs(50(units((Which(execu<on(plan(offers(the(lowest(cost?((5(Key Idea: Algebraic Optimization N(=(((z*2)+((z*3)+0))/1((Algebraic(Laws:((1.((+)(iden<ty:((((( (x+0(=(x(2.((/)((iden<ty:(((((( (x/1(=(x(3.((*)(distributes:( ((n*x+n*y)(=(n*(x+y)(4.((*)(commutes:( (x*y(=(y*x((Apply(rules(1,(3,(4,(2:(N(=((2+3)*z((two(opera<ons(instead(of(five,(no(division(operator(6(Optimization with Relational Algebra Supplier(sid,(sname,(scity,(sstate)(Supply(sid,(pno,(quan<ty)(SELECT sname FROM Supplier x, Supply y WHERE x.sid = y.sid and y.pno = 2 and x.scity = ʻSeattleʼ and x.sstate = ʻWAʼ Here(is(a(different(rela<onal(algebra(expression(for(this(query:(π sname(σ scity=ʻSea2leʼ∧(sstate=ʻWAʼ∧(pno=2((Supplier((((((sid(=(sid(Supply))(π sname((σ scity=ʻSea2leʼ∧(sstate=ʻWAʼSupplier)((((((sid(=(sid(((σ pno=2(Supply))(Query(Op<miza<on(Goal:((For(a(query,(there(may(exist(many(logical(and(physical(query(plans.(Query(Op<mizer(needs(to(pick(a("good"(one.(7(Hands-on Example Supplier(sid,(sname,(scity,(sstate)(Supply(sid,(pno,(quan<ty)(SELECT sname FROM Supplier x, Supply y WHERE x.sid = y.sid and y.pno = 2 and x.scity = ʻSeattleʼ and x.sstate = ʻWAʼ Some(sta<s<cs( T(Supplier)(=(1000(records( T(Supply)(=(10,000(records( B(Supplier)(=(100(pages( B(Supply)(=(100(pages( V(Supplier,scity)(=(20( V(Supplier,state)(=(10( V(Supply,pno)(=(2,500( Both(rela<ons(are(clustered( M(=(10(8(Supplier(Supply(sid(=(sid(σ scity=ʻSea2leʼ∧(sstate=ʻWAʼ∧(pno=2(π sname((File(scan)((File(scan)((Blockbnested(loop)((On(the(fly)((On(the(fly)(B(Supplier)(=((100(B(Supply)(=(((((100(T(Supplier)(=((1,000(T(Supply)(=((10,000(V(Supplier,(scity)(=((((20(V(Supplier,(state)(=(((10(V(Supply,(pno)(=((2,500(M(=(10(Physical Query Plan 1 Supplier(sid,(sname,(scity,(sstate)(Supply(sid,(pno,(quan<ty)(2(3(1(=(B(Supplier)+B(Supplier)•B(Supply)/M(=(100(+(100•100/10(=(1,100((I/Os(1(2( 3(Selec<on(and(project(onbthebfly(b>(No(addi<onal(cost.(Cost(=(1,100((I/Os(9(Supplier(Supply(sid(=(sid(σscity=ʻSea2leʼ∧(sstate=ʻWAʼ(π sname((File(scan)((File(scan)((Sortbmerge(join)((Scan(write(to(T2)((On(the(fly)(σpno=2(Physical Query Plan 2 =(100(+(100•1/20•1/10(=(100.5(≈(101(1((Scan((write(to(T1)(1( 2(3(=(100(+(100•1/2500(≈(101(2(=(B(T1)(+(B(T2)(=(1(+(1(=(2(3(Cost(≈((204((I/Os(B(Supplier)(=((100(B(Supply)(=(((((100(T(Supplier)(=((1,000(T(Supply)(=((10,000(V(Supplier,(scity)(=((((20(V(Supplier,(state)(=(((10(V(Supply,(pno)(=((2,500(M(=(10(Supplier(sid,(sname,(scity,(sstate)(Supply(sid,(pno,(quan<ty)(1(page( 1(page(x Independence(assump<on(10(Supply( Supplier(sid(=(sid(σ scity=ʻSea2leʼ∧sstate=ʻWAʼ(π sname((Index(nested(loop)((Clustered(Index(lookup(on(sid)((On(the(fly)(σ(pno=2((Clustered(Index(lookup(on(pno)((Use(index)((On(the(fly)(4(tuples(B(Supplier)(=((100(B(Supply)(=(((((100(T(Supplier)(=((1,000(T(Supply)(=((10,000(V(Supplier,(scity)(=((((20(V(Supplier,(state)(=(((10(V(Supply,(pno)(=((2,500(M(=(10(Supplier(sid,(sname,(scity,(sstate)(Supply(sid,(pno,(quan<ty)(Physical Query Plan 3 1(2(=(B(Supply)/V(Supply,pno)(≈(1(1(=(4(2(each(of(the(4(tuples(will(have(a(different(sid(Cost(≈(((5((I/Os(x 11(Simplifications In(the(previous(examples,(we(assumed(that(all(index(pages(were(in(memory( When(this(is(not(the(case,(we(need(to(add(the(cost(of(fetching(index(pages(from(disk(12(Query Optimization Goal / Algorithm Query(Op<miza<on(Goal( For(a(query,(there(exist(many(logical(and(physical(plans.(Query(op<mizer(needs(to(pick(a(good(one.(How?( Query(Op<miza<on(Algorithm( Enumerate(alterna<ve(plans( Compute(es<mated(cost(of(each(plan( Compute(both(number(of(I/Os,(and(CPU(cost( Choose(plan(with(lowest(cost( This(is(called(costbbased(op<miza<on(13(Lessons Need(to(consider(several(physical(plan( even(for(one,(simple(logical(plan( No(magic(“best”(plan:(depends(on(the(data( In(order(to(make(the(right(choice( need(to(have(sta*s*cs(over(the(data( the(Bʼs,(the(Tʼs,(the(Vʼs(14(Outline Search0space0 Algorithm(for(enumera<ng(query(plans( Es<ma<ng(the(cost(of(a(query(plan(15(Relational Algebra Equivalences Selec<ons( Commuta<ve:(σc1(σc2(R))(same(as(σc2(σc1(R))( Cascading:((σc1∧c2(R)(same(as(σc2(σc1(R))( Projec<ons( projec<ons(can(be(added(as(long(as(all(a2ributes(are(kept(that(are(used(in(later(operators(or(the(results( Joins( Commuta<ve(:(R(⋈(S(same(as(S(⋈(R(( Associa<ve:(R(⋈((S(⋈(T)(same(as((R(⋈(S)(⋈(T((16(Left-Deep Plans and Bushy Plans R1( R2( R3( R4(R1(R2(R3(R4(Lembdeep(plan(Bushy(plan(x 4(rela<ons:(•(#(different(tree(shapes(=(5(•(#(different(orders(=(4!(=(24(•(#(different(join(trees(=(5(*(24(=(120(17(Commutativity, Associativity, Distributivity R(∪(S(=(S(∪(R,((R(∪((S(∪(T)(=((R(∪(S)(∪(T(R(⨝(S(=(S(⨝(R,((R(⨝((S(⨝(T)(=((R(⨝(S)(⨝(T(R(⨝((S(∪(T)((=(((R(⨝(S)(∪((R(⨝(T)(18(Example Assump<ons:( Every(join(selec<vity(is(10%( That(is:(T(R(⨝(S)(=(0.1(*(T(R)(*(T(S)((etc.( B(R)=100,(B(S)(=(50,(B(T)=500( All(joins(are(main(memory(joins( All(intermediate(results(are(materialized(Which(plan(is(more(efficient:(R(⨝((S(⨝(T)((or(((R(⨝(S)(⨝(T(?(Note:(some<mes(defined(differently!(19(Laws involving selection: σ C AND Cʼ(R) = σ C(σ Cʼ(R)) = σ C(R) ∩ σ Cʼ(R) σ C OR Cʼ(R) = σ C(R)
View Full Document