Skip directly to content

Logistic Regression in Julia

#############################
using Gadfly
using Distributions
#############################
## classes linearmente separaveis linear
n = 40
sigma = 5
d1 = Normal(0,sigma)    
d2 = Normal(20,sigma)  
# classe 1
x1 = rand(d1,n)
x2 = rand(d1,n)
samples = [x1 x2 zeros(n)]
# classe 2
x1 = rand(d2,n)
x2 = rand(d2,n)
samples = [x; x1 x2 ones(n)]

plot(layer(x=samples[:,1],y=samples[:,2],Geom.point))
#    layer(x=1:n,y=ysample2,Geom.point))



#############################
MAX_EPISODES = 500

function cost(examples, theta)
    err = 0
    for (x,y) in examples
        err += y * log(hypothesis(x, theta)) + (1-y) * log(1-hypothesis(x, theta))
    end
    - err/length(examples)
end

function gradient_descent(examples, theta; learning_rate=0.05)
    err = 10000
#     learning_rate = 0.05
    max_episodes = MAX_EPISODES
    i = 0
    while err > 0.005 && i < max_episodes
        println("Episode $i")
        theta = theta - learning_rate * gradient(examples, theta)
        println("theta: $theta")
        err = cost(examples, theta)
        println("cost: $err")
        i += 1
    end
    theta
end

function gradient(examples, theta)
    grad = zeros(size(theta))
    for (x,y) in examples  
        #println(x," __ ",y)
        #println(hypothesis(x, theta)[1] -y)
        #println((hypothesis(x, theta) - y) * [1 x]')
        grad = grad + (hypothesis(x, theta) - y) * hypothesis_features(x)
    end
    grad/length(examples)
end

function sigmoid(x)
    1/(1+exp(-x))
end

function hypothesis(x, theta)
    y =  hypothesis_features(x)' * theta
    sigmoid(y[1])
end

# Plota resultados
function plot_results()
    h_ = zeros(size(samples,1))
    for i in 1:size(samples,1)
        h_[i] = hypothesis(samples[i,1:2]', theta)
    end
    #x = -10:0.5:30
    #y = [hypothesis(x_, theta) for x_ in x]
    x1 = -10:0.5:30
    x2 = [hypothesis_curve(x_, theta) for x_ in x1]
    #plot(layer(x=x1, y=x2, Geom.point, Theme(default_color=color("red"))))
    plot(layer(x=x1, y=x2, Geom.point, Theme(default_color=color("red"))),
        layer(x=samples[:,1],y=samples[:,2],Geom.point))
    
    # hypothesis in red
#     plot(layer(x=x, y=h_, Geom.point, Theme(default_color=color("red"))),
#     layer(x=x, y=y, Geom.point),
#     layer(x=x, y=ysample, Geom.point, Theme(default_color=color("black"))))
    
end

#############################
# para hipotese linear
function hypothesis_features(x)
    [1; x;]
end
function hypothesis_curve(x1, theta)
    - (theta[1] + theta[2] * x1) / theta[3]
end
theta_ini = ones(3) * 0.01
dataset = [(samples[i,1:2]',samples[i,3]) for i in 1:size(samples,1)]
theta = gradient_descent(dataset, theta_ini, learning_rate=0.1)
println("Theta $theta")
plot_results()