import
math
import
random
from
functools
import
lru_cache
trainingset
=
{ (
0
,
0
):(
0
,
0
), (
0
,
1
):(
1
,
0
), (
1
,
0
):(
1
,
0
), (
1
,
1
):(
0
,
1
) }
def
grad_desc(cost, gradients, initial_values, step_size, threshold):
old_values
=
initial_values
while
True
:
new_values
=
[ value
-
step_size
*
gradient(
*
old_values)
for
(value, gradient)
in
zip
(old_values, gradients) ]
if
cost(
*
new_values) < threshold:
return
new_values
old_values
=
new_values
def
a(z):
return
1
/
(
1
+
math.exp(
-
z))
@lru_cache
(maxsize
=
4
)
def
ns(x0, x1, w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31):
(n00, n01)
=
( x0, x1 )
(n10, n11)
=
( a(n00
*
w100
+
n01
*
w110
+
b10), a(n00
*
w101
+
n01
*
w111
+
b11) )
(n20, n21)
=
( a(n10
*
w200
+
n11
*
w210
+
b20), a(n10
*
w201
+
n11
*
w211
+
b21) )
(n30, n31)
=
( a(n20
*
w300
+
n21
*
w310
+
b30), a(n20
*
w301
+
n21
*
w311
+
b31) )
return
( (n00, n01), (n10, n11), (n20, n21), (n30, n31) )
def
out(x0, x1, w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31):
return
ns(x0, x1, w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31)[
-
1
]
def
cost(w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31):
tmp
=
0
for
((x0,x1), (t0,t1))
in
trainingset.items():
(y0, y1)
=
out(x0, x1, w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31)
tmp
+
=
(y0
-
t0)
*
*
2
+
(y1
-
t1)
*
*
2
return
tmp
/
(
2
*
len
(trainingset))
def
dCdw300(w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31):
tmp
=
0
for
((x0,x1), (t0,t1))
in
trainingset.items():
( (n00, n01), (n10, n11), (n20, n21), (n30, n31) )
=
ns(x0, x1, w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31)
(y0, y1)
=
(n30, n31)
tmp
+
=
(y0
-
t0)
*
n30
*
(
1
-
n30)
*
n20
return
tmp
/
len
(trainingset)
def
dCdw310(w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31):
tmp
=
0
for
((x0,x1), (t0,t1))
in
trainingset.items():
( (n00, n01), (n10, n11), (n20, n21), (n30, n31) )
=
ns(x0, x1, w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31)
(y0, y1)
=
(n30, n31)
tmp
+
=
(y0
-
t0)
*
n30
*
(
1
-
n30)
*
n21
return
tmp
/
len
(trainingset)
def
dCdw301(w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31):
tmp
=
0
for
((x0,x1), (t0,t1))
in
trainingset.items():
( (n00, n01), (n10, n11), (n20, n21), (n30, n31) )
=
ns(x0, x1, w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31)
(y0, y1)
=
(n30, n31)
tmp
+
=
(y1
-
t1)
*
n31
*
(
1
-
n31)
*
n20
return
tmp
/
len
(trainingset)
def
dCdw311(w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31):
tmp
=
0
for
((x0,x1), (t0,t1))
in
trainingset.items():
( (n00, n01), (n10, n11), (n20, n21), (n30, n31) )
=
ns(x0, x1, w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31)
(y0, y1)
=
(n30, n31)
tmp
+
=
(y1
-
t1)
*
n31
*
(
1
-
n31)
*
n21
return
tmp
/
len
(trainingset)
def
dCdb30(w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31):
tmp
=
0
for
((x0,x1), (t0,t1))
in
trainingset.items():
( (n00, n01), (n10, n11), (n20, n21), (n30, n31) )
=
ns(x0, x1, w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31)
(y0, y1)
=
(n30, n31)
tmp
+
=
(y0
-
t0)
*
n30
*
(
1
-
n30)
return
tmp
/
len
(trainingset)
def
dCdb31(w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31):
tmp
=
0
for
((x0,x1), (t0,t1))
in
trainingset.items():
( (n00, n01), (n10, n11), (n20, n21), (n30, n31) )
=
ns(x0, x1, w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31)
(y0, y1)
=
(n30, n31)
tmp
+
=
(y1
-
t1)
*
n31
*
(
1
-
n31)
return
tmp
/
len
(trainingset)
def
dCdw200(w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31):
tmp
=
0
for
((x0,x1), (t0,t1))
in
trainingset.items():
( (n00, n01), (n10, n11), (n20, n21), (n30, n31) )
=
ns(x0, x1, w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31)
(y0, y1)
=
(n30, n31)
tmp
+
=
(y0
-
t0)
*
( n30
*
(
1
-
n30)
*
( w300
*
n20
*
(
1
-
n20)
*
n10 ))
tmp
+
=
(y1
-
t1)
*
( n31
*
(
1
-
n31)
*
( w301
*
n20
*
(
1
-
n20)
*
n10 ))
return
tmp
/
len
(trainingset)
def
dCdw210(w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31):
tmp
=
0
for
((x0,x1), (t0,t1))
in
trainingset.items():
( (n00, n01), (n10, n11), (n20, n21), (n30, n31) )
=
ns(x0, x1, w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31)
(y0, y1)
=
(n30, n31)
tmp
+
=
(y0
-
t0)
*
( n30
*
(
1
-
n30)
*
( w300
*
n20
*
(
1
-
n20)
*
n11 ))
tmp
+
=
(y1
-
t1)
*
( n31
*
(
1
-
n31)
*
( w301
*
n20
*
(
1
-
n20)
*
n11 ))
return
tmp
/
len
(trainingset)
def
dCdw201(w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31):
tmp
=
0
for
((x0,x1), (t0,t1))
in
trainingset.items():
( (n00, n01), (n10, n11), (n20, n21), (n30, n31) )
=
ns(x0, x1, w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31)
(y0, y1)
=
(n30, n31)
tmp
+
=
(y0
-
t0)
*
( n30
*
(
1
-
n30)
*
( w310
*
n21
*
(
1
-
n21)
*
n10 ))
tmp
+
=
(y1
-
t1)
*
( n31
*
(
1
-
n31)
*
( w311
*
n21
*
(
1
-
n21)
*
n10 ))
return
tmp
/
len
(trainingset)
def
dCdw211(w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31):
tmp
=
0
for
((x0,x1), (t0,t1))
in
trainingset.items():
( (n00, n01), (n10, n11), (n20, n21), (n30, n31) )
=
ns(x0, x1, w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31)
(y0, y1)
=
(n30, n31)
tmp
+
=
(y0
-
t0)
*
( n30
*
(
1
-
n30)
*
( w310
*
n21
*
(
1
-
n21)
*
n11 ))
tmp
+
=
(y1
-
t1)
*
( n31
*
(
1
-
n31)
*
( w311
*
n21
*
(
1
-
n21)
*
n11 ))
return
tmp
/
len
(trainingset)
def
dCdb20(w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31):
tmp
=
0
for
((x0,x1), (t0,t1))
in
trainingset.items():
( (n00, n01), (n10, n11), (n20, n21), (n30, n31) )
=
ns(x0, x1, w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31)
(y0, y1)
=
(n30, n31)
tmp
+
=
(y0
-
t0)
*
( n30
*
(
1
-
n30)
*
( w300
*
n20
*
(
1
-
n20) ))
tmp
+
=
(y1
-
t1)
*
( n31
*
(
1
-
n31)
*
( w301
*
n20
*
(
1
-
n20) ))
return
tmp
/
len
(trainingset)
def
dCdb21(w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31):
tmp
=
0
for
((x0,x1), (t0,t1))
in
trainingset.items():
( (n00, n01), (n10, n11), (n20, n21), (n30, n31) )
=
ns(x0, x1, w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31)
(y0, y1)
=
(n30, n31)
tmp
+
=
(y0
-
t0)
*
( n30
*
(
1
-
n30)
*
( w310
*
n21
*
(
1
-
n21) ))
tmp
+
=
(y1
-
t1)
*
( n31
*
(
1
-
n31)
*
( w311
*
n21
*
(
1
-
n21) ))
return
tmp
/
len
(trainingset)
def
dCdw100(w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31):
tmp
=
0
for
((x0,x1), (t0,t1))
in
trainingset.items():
( (n00, n01), (n10, n11), (n20, n21), (n30, n31) )
=
ns(x0, x1, w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31)
(y0, y1)
=
(n30, n31)
tmp
+
=
(y0
-
t0)
*
( n30
*
(
1
-
n30)
*
( w300
*
n20
*
(
1
-
n20)
*
w200
*
n10
*
(
1
-
n10)
*
n00
+
w310
*
n21
*
(
1
-
n21)
*
w201
*
n10
*
(
1
-
n10)
*
n00 ))
tmp
+
=
(y1
-
t1)
*
( n31
*
(
1
-
n31)
*
( w301
*
n20
*
(
1
-
n20)
*
w200
*
n10
*
(
1
-
n10)
*
n00
+
w311
*
n21
*
(
1
-
n21)
*
w201
*
n10
*
(
1
-
n10)
*
n00 ))
return
tmp
/
len
(trainingset)
def
dCdw110(w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31):
tmp
=
0
for
((x0,x1), (t0,t1))
in
trainingset.items():
( (n00, n01), (n10, n11), (n20, n21), (n30, n31) )
=
ns(x0, x1, w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31)
(y0, y1)
=
(n30, n31)
tmp
+
=
(y0
-
t0)
*
( n30
*
(
1
-
n30)
*
( w300
*
n20
*
(
1
-
n20)
*
w200
*
n10
*
(
1
-
n10)
*
n01
+
w310
*
n21
*
(
1
-
n21)
*
w201
*
n10
*
(
1
-
n10)
*
n01 ))
tmp
+
=
(y1
-
t1)
*
( n31
*
(
1
-
n31)
*
( w301
*
n20
*
(
1
-
n20)
*
w200
*
n10
*
(
1
-
n10)
*
n01
+
w311
*
n21
*
(
1
-
n21)
*
w201
*
n10
*
(
1
-
n10)
*
n01 ))
return
tmp
/
len
(trainingset)
def
dCdw101(w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31):
tmp
=
0
for
((x0,x1), (t0,t1))
in
trainingset.items():
( (n00, n01), (n10, n11), (n20, n21), (n30, n31) )
=
ns(x0, x1, w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31)
(y0, y1)
=
(n30, n31)
tmp
+
=
(y0
-
t0)
*
( n30
*
(
1
-
n30)
*
( w300
*
n20
*
(
1
-
n20)
*
w210
*
n11
*
(
1
-
n11)
*
n00
+
w310
*
n21
*
(
1
-
n21)
*
w211
*
n11
*
(
1
-
n11)
*
n00 ))
tmp
+
=
(y1
-
t1)
*
( n31
*
(
1
-
n31)
*
( w301
*
n20
*
(
1
-
n20)
*
w210
*
n11
*
(
1
-
n11)
*
n00
+
w311
*
n21
*
(
1
-
n21)
*
w211
*
n11
*
(
1
-
n11)
*
n00 ))
return
tmp
/
len
(trainingset)
def
dCdw111(w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31):
tmp
=
0
for
((x0,x1), (t0,t1))
in
trainingset.items():
( (n00, n01), (n10, n11), (n20, n21), (n30, n31) )
=
ns(x0, x1, w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31)
(y0, y1)
=
(n30, n31)
tmp
+
=
(y0
-
t0)
*
( n30
*
(
1
-
n30)
*
( w300
*
n20
*
(
1
-
n20)
*
w210
*
n11
*
(
1
-
n11)
*
n01
+
w310
*
n21
*
(
1
-
n21)
*
w211
*
n11
*
(
1
-
n11)
*
n01 ))
tmp
+
=
(y1
-
t1)
*
( n31
*
(
1
-
n31)
*
( w301
*
n20
*
(
1
-
n20)
*
w210
*
n11
*
(
1
-
n11)
*
n01
+
w311
*
n21
*
(
1
-
n21)
*
w211
*
n11
*
(
1
-
n11)
*
n01 ))
return
tmp
/
len
(trainingset)
def
dCdb10(w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31):
tmp
=
0
for
((x0,x1), (t0,t1))
in
trainingset.items():
( (n00, n01), (n10, n11), (n20, n21), (n30, n31) )
=
ns(x0, x1, w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31)
(y0, y1)
=
(n30, n31)
tmp
+
=
(y0
-
t0)
*
( n30
*
(
1
-
n30)
*
( w300
*
n20
*
(
1
-
n20)
*
w200
*
n10
*
(
1
-
n10)
+
w310
*
n21
*
(
1
-
n21)
*
w201
*
n10
*
(
1
-
n10) ))
tmp
+
=
(y1
-
t1)
*
( n31
*
(
1
-
n31)
*
( w301
*
n20
*
(
1
-
n20)
*
w200
*
n10
*
(
1
-
n10)
+
w311
*
n21
*
(
1
-
n21)
*
w201
*
n10
*
(
1
-
n10) ))
return
tmp
/
len
(trainingset)
def
dCdb11(w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31):
tmp
=
0
for
((x0,x1), (t0,t1))
in
trainingset.items():
( (n00, n01), (n10, n11), (n20, n21), (n30, n31) )
=
ns(x0, x1, w100, w101, w110, w111, b10, b11, w200, w201, w210, w211, b20, b21, w300, w301, w310, w311, b30, b31)
(y0, y1)
=
(n30, n31)
tmp
+
=
(y0
-
t0)
*
( n30
*
(
1
-
n30)
*
( w300
*
n20
*
(
1
-
n20)
*
w210
*
n11
*
(
1
-
n11)
+
w310
*
n21
*
(
1
-
n21)
*
w211
*
n11
*
(
1
-
n11) ))
tmp
+
=
(y1
-
t1)
*
( n31
*
(
1
-
n31)
*
( w301
*
n20
*
(
1
-
n20)
*
w210
*
n11
*
(
1
-
n11)
+
w311
*
n21
*
(
1
-
n21)
*
w211
*
n11
*
(
1
-
n11) ))
return
tmp
/
len
(trainingset)
new_values
=
grad_desc(
cost,
[ dCdw100, dCdw101, dCdw110, dCdw111, dCdb10, dCdb11, dCdw200, dCdw201, dCdw210, dCdw211, dCdb20, dCdb21, dCdw300, dCdw301, dCdw310, dCdw311, dCdb30, dCdb31 ],
[
2
*
random.random()
-
1
for
_
in
range
(
18
) ],
0.5
,
1e
-
3
)
print
(
'cost:'
, cost(
*
new_values))
print
(
'output:'
)
for
((x0,x1), (t0,t1))
in
trainingset.items():
print
(
' '
, (x0,x1), out(x0,x1,
*
new_values))