Lecture 16: Data Storage and Indexes Introduction to Database Systems CSE 444 Version(Feb(11,(2011(2(Flowers bar via Law SchoolWhere we stand How(to(use(a(DBMS(as(a:( Data(analyst:(SQL,(SQL,(SQL,(… (( ApplicaAon(programmer:(JDBC( Database(admin:(tuning,(triggers,(security( MassiveHscale(data(analyst:(Pig/MapReduce( How(DBMSs(work:( TransacAons( Data(storage(and(indexing( Query(execuAon(( Databases(as(a(service(3(hQp://www.cs.washington.edu/educaAon/courses/cse444/11wi/(((today(xOutline Storage(model( Index(structures((SecAon(14.1)( [Old(ediAon:(13.1(and(13.2]( BHtrees((SecAon(14.2)( [Old(ediAon:(13. 3](4(hQp://www.cs.washington.edu/educaAon/courses/cse444/11wi/(((Memory hierarchy Source:("Long(Term(Storage(Trends(and(You",(Jim(Gray,(2006:(hQp://research.microso^.com/enHus/um/people/gray/talks/(Outline Storage(model( Index(structures((SecAon(14.1)( [Old(ediAon:(13.1(and(13.2]( BHtrees((SecAon(14.2)( [Old(ediAon:(13. 3](6(hQp://www.cs.washington.edu/educaAon/courses/cse444/11wi/(((High-level overview: Indexes 7(hQp://www.cs.washington.edu/educaAon/courses/cse444/11wi/(((id( age( salary( other(006( 19( 50k( ...(005( 20( 55k( ...(004( 25( 50k( ...(007( 30( 80k( ...(002( 35( 75k( ...(003( 35( 70k( ...(001( 40( 65k( ...(id( age( salary( other(006( 19( 50k( ...(004( 25( 50k( ...(005( 20( 55k( ...(001( 40( 65k( ...(003( 35( 70k( ...(002( 35( 75k( ...(007( 30( 80k( ...(data(file(=(index(file(clustered((primary)(index(index(file(unclustered((secondary)(index(xDatabase File Types The(data(file(can(be(one(of:( Heap(file( Set(of(records,(parAAoned(into(blocks( Unsorted( SequenAal(file( Sorted(according(to(some(aQribute(s)(called((sort)(key(8(hQp://www.cs.washington.edu/educaAon/courses/cse444/11wi/(((different(from("primary(key"!(xIndex A((possibly(separate)(file,(that(allows(fast(access(to(records(in(the(data(file(given(a(search(key( The(index(contains((key,(value)(pairs:( The(key(=(an(aQribute(value( The(value(=(either(a(pointer(to(the(record,(or(the(record(itself(9(hQp://www.cs.washington.edu/educaAon/courses/cse444/11wi/(((x again(different(from("primary(key"!(Index Classification Clustered/unclustered( Clustered(=(records(close(in(index(are(close(in(data( Unclustered(=(records(close(in(index(may(be(far(in(data( Primary/secondary( Meaning(1:( Primary(=(is(over(aQributes(that(include(the(primary(key( Secondary(=(otherwise( Meaning(2:(means(the(same(as(clustered/unclustered( OrganizaAon:(B+(tree(or(Hash(table(10(hQp://www.cs.washington.edu/educaAon/courses/cse444/11wi/((((Cow(book)((Stanford(book)(xClustered/Unclustered Clustered( Index(determines(the(locaAon(of(indexed(records( Typically,(clustered(index(is(one(where(values(are(data(records((but(not(necessary)( Unclustered( Index(cannot(reorder(data,(does(not(determine(data(locaAon( In(these(indexes:(value(=(pointer(to(data(record(11(hQp://www.cs.washington.edu/educaAon/courses/cse444/11wi/(((Clustered Index File(is(sorted(on(the(index(aQribute( Only(one(per(table(12(hQp://www.cs.washington.edu/educaAon/courses/cse444/11wi/(((10 20 30 40 50 60 70 80 10 20 30 40 50 60 70 80 Index File Data FileUnclustered Index Several(per(table(13(hQp://www.cs.washington.edu/educaAon/courses/cse444/11wi/(((10 10 20 20 20 30 30 30 20 30 30 20 10 20 10 30 Index File Data File xClustered vs. Unclustered Index 14(hQp://www.cs.washington.edu/educaAon/courses/cse444/11wi/(((Data$entries$(Index$File)$(Data$file)$Data$Records$Data$entries$Data$Records$CLUSTERED$ UNCLUSTERED$B+(Tree(B+(Tree(More(commonly,(in(a(clustered(B+(Tree(index,((data$entries$are$data$ r ecords$Hash-Based Index Example 15(hQp://www.cs.washington.edu/educaAon/courses/cse444/11wi/(((10 21 20 20 30 18 40 19 50 22 60 18 70 21 80 19 H1(h1(sid)(=(00(h1(sid)(=(11(sid(Example(hashHbased(index(on(sid((student(id)(This(is(a(primary$index((because(it(determines(the((order(of(indexed(records((In(this(case,(data(entries(in(the(index(are(actual(data(records(There(is(no(separate(data(file((This(index(is(also(clustered$Index(File(Hash(funcAon(h1(Hash-Based Index Example 2 16(hQp://www.cs.washington.edu/educaAon/courses/cse444/11wi/(((18 18 20 22 19 21 21 19 10 21 20 20 30 18 40 19 50 22 60 18 70 21 80 19 H2(age(h2(age)(=(00(h2(age)(=(01(Secondary$index(Data(entries(in(index(are((key,RID)(pairs((Unclustered$index(Data(File(Index(File(Hash-Based Index 17(hQp://www.cs.washington.edu/educaAon/courses/cse444/11wi/(((18 18 20 22 19 21 21 19 10 21 20 20 30 18 40 19 50 22 60 18 70 21 80 19 H1(h1(sid)(=(00(h1(sid)(=(11(sid(H2(age(h2(age)(=(00(h2(age)(=(01(Another(example(of((clustered/primary(index(Another(example(of(unclustered/secondary(index(Good(for(point(queries(but(not(range(queries(Outline Storage(model( Index(structures((SecAon(14.1)( [Old(ediAon:(13.1(and(13.2]( BHtrees((SecAon(14.2)( [Old(ediAon:(13. 3](18(hQp://www.cs.washington.edu/educaAon/courses/cse444/11wi/(((B+ Trees Search(trees(( Idea(in(B(Trees( Make(1(node(=(1(block( Keep(tree(balanced(in(height( Idea(in(B+(Trees( Make(leaves(into(a(linked(list:(facilitates(range(queries(19(hQp://www.cs.washington.edu/educaAon/courses/cse444/11wi/(((B+ Trees Basics Parameter(d(=(the(degree( Each(interior(node(has(d$≤($m$≤($2d$keys((except(root)( Each(leaf(node(has(d$≤($m$≤($2d$keys(20(hQp://www.cs.washington.edu/educaAon/courses/cse444/11wi/(((30 120 240 Keys(k(<(30( Keys(30≤(k<120( Keys(120≤(k<240( Keys(240≤(k(40 50 60 40( 50( 60(Next(leaf(Each(node(also(has(m+1$pointers(Data(records(B+ Tree Example 21(hQp://www.cs.washington.edu/educaAon/courses/cse444/11wi/(((80 20 60 100 120 140 10 15 18 20 30 40 50 60 65 80 85 90 10( 15( 18( 20( 30( 40( 50( 60( 65( 80( 85( 90(d(=(2(Find(the(key(40(40(40(≤(80(20(<(40(≤(60(30(<(40(≤(40(B+ Tree Design How(large(d(?( Example:( Key(size(=(4(bytes( Pointer(size(=(8(bytes( Block(size(=(4096(bytes( 2d(x(4((+((2d+1)(x(8((<=((4096( d(=(170(22(hQp://www.cs.washington.edu/educaAon/courses/cse444/11wi/(((80 2d(keys(Searching a B+ Tree Exact(key(values:( Start(at(the(root( Proceed(down,(to(the(leaf( Range(queries:(
View Full Document