CS#59000#Sta)s)cal#Machine#learning#Lecture#22#Yuan#(Alan)#Qi#Outline ((• Review(of(D/separa4on(• Markov(random(fields,(Markov(blankets(• Inference(on(chains(• Factor(graphs(• The(sum/product(algorithm((belief(prop aga4on)(D/separa4on(• A,(B,(and(C(are(non/intersec4ng(subsets(of(nodes(in(a(directed(graph.(• A(path(from(A(to(B(is(blocked(if(it(contains(a(node(such(that(either(a) the(arrows(on(the(path(meet(either(head/to/tail(or(tail/to/tail (at(the(node,(and(the(node(is(in(the(set(C,(or(b) the(arrows(meet(head/to/head(at(the(node,(and(neither(the(node,(nor(any(of(its(descendants,(are(in(the(set(C.(• If(all(paths(from(A(to(B(are(blocked,(A(is(said(to(be(d/separated(from(B(by(C.((• If(A(is(d/separated(from(B(by(C,(the(joint(distribu4on(over(all(variables(in(the(graph(sa4sfies(((((((((((((((((((((((.(D/separa4on:(Example(D/separa4on:(I.I.D.(Data(Bayesian(Curve(FiMng(Revisited(D/separa4on(implies(that(informa4on(from(training(data(i s(summarized(in(w.((The(Markov(Blanket(Factors(independent(of(xi(cancel(between(numerator(and(denominator.(Markov(Random(Fields(Markov(Blanket(Cliques(and(Maximal(Cliques(Clique(Maximal(Clique(Joint(Distribu4on(where(((((((((((((((((((is(the(poten4al(over(clique(C(and((is(the(normaliza4on(coefficient;(note:(M(K/state(variables(→(KM(terms(in(Z.(Energies(and(the(Boltzmann(distribu4on(Illustra4 on:(Image(De/Noising((1)(Original(Image(Noisy(Image(Illustra4 on:(Image(De/Noising((2)(Illustra4 on:(Image(De/Noising((3)(Noisy(Image( Restored(Image((ICM)(Conver4ng(Directed(to(Undirected(Graphs((1)(Conver4ng(Directed(to(Undirected(Graphs((2)(Addi4onal(links:(“marrying(parents”,(i.e.,(moraliza4on(Directed(vs.(Undirected(Graphs((Inference(in(Graphical(Models(Inference(on(a(Chain(Computational time increases exponentially with N.Inference(on(a(Chain(Inference(on(a(Chain(Inference(on(a(Chain(Inference(on(a(Chain(To(compute(local(marginals:(• Compute(and(store(all(forward(messages,(((((((((((((.(• Compute(and(store(all(backward(messages,(((((((((((((.((• Compute(Z(at(any(node(xm((• Compute(for(all(variables(required.(Computational time increases exponentially with N.Trees(Undirected Tree Directed Tree PolytreeFactor(Graphs(Factor(Graphs(from(Directed( Graphs(Factor(Graphs(from(Undirected(Graphs(The(Sum/Product(Algorithm((1)(Objec4ve:(i. to(obtain(an(efficient,(exact(inference(algorithm(for(finding(marginals;(ii. in(situa4ons(where(several(marginals(are(required,(to(allow(computa4ons(to(be(shared(efficiently.(Key(idea:(Distribu4ve(Law(The(Sum/Product(Algorithm((2)(ne(x): factor nodes that are neighbors of x Xs: variables in the subtree connected to the variable node x via the factor node fsThe(Sum/Product(Algorithm((3)(Why(this(is(true?(Message(from(factor(node(to(variable(node((The(Sum/Product(Algorithm((3)(p(x)=�X�s∈ne(x)Fs(x, Xs) (1)=�XaFa(x, Xa)�X\a�s∈ne(x)&s�=aFs(x, Xs) (2)=�s∈ne(x)�XsFs(x, Xs) (3)The(Sum/Product(Algorithm((4)(Message(from(variable((node(to(factor(node((The(Sum/Product(Algorithm((5)(The(Sum/Product(Algorithm((6)(How to compute the message from a variable node to a factor node efficiently? Do the same thing just as when we compute p(x), but with a small difference.The(Sum/Product(Algorithm((7)(Ini4aliza4on(The(Sum/Product(Algorithm((8)(To(compute(local(marginals:(• Pick(an(arbitrary(node(as(root(• Compute(and(propagate(messages(from(the(leaf(nodes(to(the(root,(storing(received(messages(at(every(node.(• Compute(and(propagate(messages(from(the(root(to(the(leaf(nodes,(storing(received(messages(at(every(node.(• Compute(the(product(of(received(messages(at(each(node(for(which
View Full Document