add projected gradient descent with hosvd projection

685795de · Lambert Theisen · 4779af07 · 685795de · 685795de · 685795de
Unverified Commit 685795de authored 7 months ago by Lambert Theisen
--- a/examples/reconstruct/TensorComputation.jl
+++ b/examples/reconstruct/TensorComputation.jl
+using LinearAlgebra
+
+function matricization(tensor, row_list, col_list = [])
+    size_tensor = size(tensor)
+    d = length(size_tensor)
+
+    if isempty(col_list)
+        col_list = setdiff(1:d, row_list)
+    end
+
+    row_size = prod(size_tensor[row_list])
+    col_size = prod(size_tensor[col_list])
+
+    matrix = zeros(row_size, col_size)
+    
+    row_lin_ind = LinearIndices(size_tensor[row_list])
+    col_lin_ind = LinearIndices(size_tensor[col_list])
+
+    for t in CartesianIndices(size_tensor)
+        t = Tuple(t)
+        row_t_ind = t[row_list]
+        col_t_ind = t[col_list]
+        row_ind = row_lin_ind[CartesianIndex(row_t_ind)]
+        col_ind = col_lin_ind[CartesianIndex(col_t_ind)]
+        matrix[row_ind, col_ind] = tensor[CartesianIndex(t)]
+    end
+    return matrix
+end
+
+function multirank(tensor)
+    d = length(size(tensor))
+    return([rank(matricization(tensor, [i])) for i in 1:d])
+end
+
+function mat2tensor(mat, size_tensor, row_list, col_list = [])
+
+    tensor = zeros(size_tensor)
+    d = length(size_tensor)
+    if isempty(col_list)
+        col_list = setdiff(1:d, row_list)
+    end
+    
+    row_lin_ind = LinearIndices(size_tensor[row_list])
+    col_lin_ind = LinearIndices(size_tensor[col_list])
+
+    for t in CartesianIndices(size_tensor)
+        t = Tuple(t)
+        row_t_ind = t[row_list]
+        col_t_ind = t[col_list]
+        row_ind = row_lin_ind[CartesianIndex(row_t_ind)]
+        col_ind = col_lin_ind[CartesianIndex(col_t_ind)]
+        tensor[CartesianIndex(t)] = mat[row_ind, col_ind]
+    end
+    
+    return tensor    
+end
+
+function mode_product(tensor, mat, mode)
+    
+    size_tensor = size(tensor)
+    
+    # Matricise the tensor and perform the product
+    mode_mat_tensor = matricization(tensor, [mode])
+    
+    # Matrix multiplication (using @ operator in Julia)
+    new_mode_mat = mat * mode_mat_tensor
+    
+   # Convert back to tensor form
+   new_tensor = mat2tensor(new_mode_mat, size_tensor, [mode])
+
+   return new_tensor
+end
+
+function hosvd(tensor)
+   
+    size_tensor = size(tensor)
+    dim = length(size_tensor)
+    result = Dict()
+    
+    for mode in 1:dim
+        modal_mat = matricization(tensor, [mode])
+ 
+        U_matrix = svd(modal_mat).U
+        
+        result[mode] = U_matrix
+         
+        # Update the tensor with core 
+        tensor = mat2tensor(U_matrix' * modal_mat ,size_tensor , [mode])
+    end
+    
+    result["core"] = tensor
+    
+    return result 
+end
+
+function hosvd_reconstruct(hosvd_result)
+    
+    core = hosvd_result["core"]
+    dim = length(size(core))
+    
+    for m in 1:dim
+        core = mode_product(core, hosvd_result[m], m)
+    end
+    
+    return core
+
+end
+ ;
\ No newline at end of file
--- a/examples/reconstruct/ThetaNorm.jl
+++ b/examples/reconstruct/ThetaNorm.jl
+using JuMP, LinearAlgebra, TensorCore, SCS
+
+module NuclearNorm
+using JuMP, LinearAlgebra
+
+function set_cons_basis(n::Tuple, model::Model, M)
+    cartesian = CartesianIndices(n)
+    linear = LinearIndices(n)
+
+    for i in cartesian
+        for j in cartesian
+            ti, tj = Tuple(i), Tuple(j)
+            if linear[tj...] < linear[ti...] || all(ti .<= tj)
+                continue
+            end
+
+            @constraint(
+                model,
+                M[linear[min.(ti, tj)...]+1, linear[max.(ti, tj)...]+1] == M[linear[ti...]+1, linear[tj...]+1]
+            )
+        end
+    end
+
+    @constraint(model, tr(M) == 2*M[1,1])
+    return
+end
+
+function dual_cons(size_tensor:: Tuple, model:: Model, M)
+    cartesian = CartesianIndices(size_tensor)
+    linear = LinearIndices(size_tensor)
+    N = prod(size_tensor)
+    reduced_pairs = zeros(Int, N+1, N+1)
+    k = 1
+    for i in cartesian
+        for j in cartesian
+            ti, tj = Tuple(i), Tuple(j)
+            if linear[ti...] < linear[tj...]
+                mi = linear[min.(ti, tj)...]
+                ma = linear[max.(ti, tj)...]
+                if reduced_pairs[mi+1, ma+1] == 0
+                    reduced_pairs[mi+1, ma+1] = k
+                    k += 1
+                end
+                reduced_pairs[linear[ti...]+1, linear[tj...]+1] = reduced_pairs[mi+1, ma+1]
+            end
+        end
+    end
+    for i in 1:(k-1)
+        @constraint(model, sum(M[reduced_pairs .== i]) == 0)
+    end
+end
+
+end
+
+
+module L1Norm
+using JuMP, LinearAlgebra
+
+function set_cons_basis(n:: Tuple, model::Model, M)
+    @constraint(model, tr(M) == 2*M[1,1])
+    # set the upper triangular matrix M to be zero
+    N = prod(n)+1
+    for i in 2:(N-1)
+        for j in (i+1):N
+            @constraint(model, M[i,j] == 0)
+        end
+    end
+end
+
+end
+# Incorrect construction of Lp norm
+module LpNorm
+using JuMP, LinearAlgebra
+
+
+function set_cons_basis(n:: Tuple, model::Model, M, p)
+    cartesian = CartesianIndices(n)
+    linear = LinearIndices(n)
+
+    for i in cartesian
+        for j in cartesian
+            ti, tj = Tuple(i), Tuple(j)
+            if linear[tj...] < linear[ti...] || all(ti .<= tj)
+                continue
+            end
+
+            @constraint(
+                model,
+                M[linear[min.(ti, tj)...]+1, linear[max.(ti, tj)...]+1] == M[linear[ti...]+1, linear[tj...]+1]
+            )
+        end
+    end
+
+
+    @NLconstraint(model, sum(M[i]^(p/2) for i in Iterators.drop(diagind(M), 1)) <= M[1,1])
+
+    return
+end
+
+end
+
+
+module MaxNorm
+using JuMP, LinearAlgebra, Random
+
+function _max(n::Tuple, a::Tuple, b::Tuple)
+    Tuple(a[i] == b[i] ? n[i] : max(a[i],b[i]) for i in eachindex(a))
+end
+
+function _min(n::Tuple, a::Tuple, b::Tuple)
+    Tuple(a[i] == b[i] ? n[i] : min(a[i],b[i]) for i in eachindex(a))
+end
+
+function set_cons_basis(n::Tuple, model::Model, M)
+    cartesian = CartesianIndices(n)
+    linear = LinearIndices(n)
+
+    for i in cartesian
+        for j in cartesian
+            ti, tj = Tuple(i), Tuple(j)
+            if linear[tj...] < linear[ti...] || all(ti .< tj)
+                continue
+            end
+
+            @constraint(
+                model,
+                M[linear[_min(n, ti, tj)...]+1, linear[_max(n, ti, tj)...]+1] == M[linear[ti...]+1, linear[tj...]+1]
+            )
+        end
+    end
+
+    @constraint(model, [i=Iterators.drop(diagind(M), 1)], M[1,1] == M[i])
+    return
+end
+
+end
+
+
+function create_tensor_CP(size_tensor, coefs, sample="normal", eps=0.0)
+    """
+    Create low rank tensor.
+
+    Input:
+    size_tensor: shape of tensor.
+    coefs: the coefficients in CP decomposition
+    sample: entries sample of ui(we will normalize ui as well)
+    eps: disturbance, default = 0.0
+
+    Output:
+    tensor: a tensor in the shape (size_tensor) with CP rank no more than length of (coefs).
+    """
+    vec_dict = Dict()
+    dim = length(size_tensor)
+    rank = length(coefs)
+
+    for i in 1:dim
+        if sample == "normal"
+            ui = randn(rank, size_tensor[i])
+        else
+            ui = rand(sample, (rank, size_tensor[i]))
+        end
+        err = rand(rank, size_tensor[i]) .* (2 * eps) .- eps  # disturbance
+        ui += err
+
+        normal_ui = (ui' ./ norm(ui', 2))'
+
+        vec_dict[i] = normal_ui
+    end
+
+    output_tensor = zeros(size_tensor)
+    for i in 1:rank
+        rank_one_tensor = vec_dict[1][i, :]
+
+        for j in 2:dim
+            rank_one_tensor = rank_one_tensor ⊗ vec_dict[j][i, :]
+        end
+
+        output_tensor .+= rank_one_tensor * coefs[i]
+    end
+
+    return output_tensor
+end
+
+function create_measurements(x0, num::Integer, normalize = true)
+    if normalize
+        A = [ randn(length(x0)) for _ in 1:num ]./sqrt(num)
+    else
+        A = [ randn(length(x0)) for _ in 1:num ]
+    end
+    b = [ dot(f, x0) for f in A ]
+    return A, b
+end
+
+function recovery_theta1(size_tensor:: Tuple, A, b, p = 2, eps_rel = 1e-8)
+
+    N = prod(size_tensor) + 1
+    #println("Establishing Problem....")
+
+    model = Model(SCS.Optimizer)
+    set_optimizer_attribute(model, "eps_abs", 1e-6)
+    @variable(model, M[1:N, 1:N], PSD)
+    @objective(model, Min, M[1,1])
+    set_optimizer_attribute(model, "verbose", 1) # remove table
+    set_optimizer_attribute(model, "eps_rel", eps_rel)
+
+
+    if p == 2
+        NuclearNorm.set_cons_basis(size_tensor, model, M)
+    elseif p == 1
+        L1Norm.set_cons_basis(size_tensor, model, M)
+    elseif p == Inf
+        MaxNorm.set_cons_basis(size_tensor, model, M)
+    else
+        println("p=1, 2 or Inf!")
+    end
+
+    unregister(model, :cons_measurements)
+    @constraint(model, cons_measurements[i=1:length(b)], dot(M[1, 2:end], A[i]) == b[i])
+
+    println("Solving Problem....")
+    optimize!(model);
+
+    min_val = value.(M[1,1])
+    x = value.(M[1, 2:end])
+
+    return min_val, x
+end
+
+function norm_theta1(x,  p = 2, eps_rel = 1e-5)
+
+    size_tensor = size(x)
+    N = prod(size_tensor) + 1
+    #println("Establishing Problem....")
+
+    model = Model(SCS.Optimizer)
+    set_optimizer_attribute(model, "verbose", 0) # remove table
+    @variable(model, M[1:N, 1:N], PSD)
+    @objective(model, Min, M[1,1])
+    set_optimizer_attribute(model, "eps_rel", eps_rel)
+
+    if p == 2
+        NuclearNorm.set_cons_basis(size_tensor, model, M)
+    elseif p == 1
+        L1Norm.set_cons_basis(size_tensor, model, M)
+    elseif p == Inf
+        MaxNorm.set_cons_basis(size_tensor, model, M)
+    else
+        println("p=1, 2 or Inf!")
+    end
+
+    @constraint(model, M[1, 2:end] .== x[1:end])
+
+
+
+    #println("Solving Problem....")
+    optimize!(model);
+
+    min_val = value.(M[1,1])
+
+    return min_val
+end
+
+function norm_dual(x, p=2, eps_rel = 1e-5)
+
+    size_tensor = size(x)
+    N = prod(size_tensor) + 1
+
+    model = Model(SCS.Optimizer)
+    set_optimizer_attribute(model, "verbose", 0) # remove table
+    @variable(model, M[1:N, 1:N], PSD)
+    @objective(model, Max, sum(vec(x).*M[1, 2:end]))
+    set_optimizer_attribute(model, "eps_rel", eps_rel)
+
+    if p == 2
+        NuclearNorm.set_cons_basis(size_tensor, model, M)
+    elseif p == 1
+        L1Norm.set_cons_basis(size_tensor, model, M)
+    elseif p == Inf
+        MaxNorm.set_cons_basis(size_tensor, model, M)
+    else
+        println("p=1, 2 or Inf!")
+    end
+
+    @constraint(model, M[1,1] == 1)
+    optimize!(model);
+
+    return objective_value(model)
+end
+
+function gamma_K(x, eps_rel = 1e-5)
+
+    size_tensor = size(x)
+    N = prod(size_tensor) + 1
+
+    model = Model(SCS.Optimizer)
+    @variable(model, M[1:N, 1:N], PSD)
+    @objective(model, Min, M[1,1]+M[2,2])
+    set_optimizer_attribute(model, "verbose", 0) # remove table
+    set_optimizer_attribute(model, "eps_rel", eps_rel)
+
+    NuclearNorm.dual_cons(size_tensor, model, M)
+    # non-necessary setting 0.
+    for i in 3:N
+        @constraint(model, M[i,i] == M[2,2])
+    end
+    cartesian = CartesianIndices(size_tensor)
+    for i in cartesian
+        ti = Tuple(i)
+        if sum(ti .!= 1) == 1
+            x[i] = 0
+        end
+    end
+
+    @constraint(model, 2*M[1, 3:end] .== x[2:end])
+    @constraint(model, M[1,1] + M[2,2] == 2*M[1,2])
+    #@constraint(model, 2*M[1, 2:end] .== x[1:end])
+    optimize!(model);
+
+    return objective_value(model)
+end
+
+function relative_error(x0, x)
+    return norm(x[1:end]-x0[1:end])/norm(x0[1:end])
+end
--- a/examples/reconstruct/test.jl
+++ b/examples/reconstruct/test.jl
+
+include("ThetaNorm.jl")
+include("TensorComputation.jl")
+
+using Random
+Random.seed!(1234) # set the seed for reproducibility
+
+using LinearAlgebra
+using Serialization
+using SparseTensorMoments
+using Test
+
+# Define centers and alphas
+centers = [
+    [0.5, 0.5, 0.5],
+    [-0.5, -0.5, -0.5]
+]
+alphas_f = [2.5, 2.5]
+
+# Create the function f(x)
+function make_f(centers, alphas_f)
+    function f(x::Vector{<:Real})
+        sum(exp.(-alphas_f .* ([norm(x .- centers[i])^2 for i in 1:length(centers)])))
+    end
+    return f
+end
+
+f = make_f(centers, alphas_f)
+
+# Define parameters
+n = 5
+width = 1.0
+max_exponent = 10
+
+# Generate X, Y, A
+X, Y, A = generate(f, width, n, max_exponent)
+
+
+
+# A_array = [A[i,:] for i in 1:size(A,1)]
+
+# n, x = recovery_theta1(size(X), A_array, Y);
+# x = reshape(x, size(X))
+# relative_error(X,x)
+
+# hosvd_x = hosvd(x);
+
+# core = hosvd_x["core"]
+# multirank(core)
+
+# core
+
+# core[abs.(core).<1e-5] .= 0
+# multirank(core)
+
+# hosvd_x["core"] = core
+# x_re = hosvd_reconstruct(hosvd_x)
+# @assert relative_error(X,x_re) < 1E-2
+
+
+function projection(X)
+  hosvd_x = hosvd(X);
+  core_tmp = hosvd_x["core"]
+  # core[abs.(core).<1e-5] .= 0
+  desired_rank = 2
+  hosvd_x["core"] = zeros(size(X))  # Create a zero matrix of the same size as A
+  hosvd_x["core"][1:desired_rank, 1:desired_rank, 1:desired_rank] .= core_tmp[1:desired_rank, 1:desired_rank, 1:desired_rank]
+  # @info core
+  # @info hosvd_x["core"]
+  return hosvd_reconstruct(hosvd_x)
+end
+
+function gradient(x)
+    return reshape(A' * (A * vec(x) - Y), size(x))
+end
+
+function optimal_gradient_descent(x, alpha, max_iter; atol=1E-10, rtol=1E-7)
+    hist = []
+    for i in 1:max_iter
+        grad = gradient(x)
+        resnorm = norm(grad)
+        push!(hist, resnorm)
+        reslresnorm = resnorm / norm(x)
+        @info i, resnorm, reslresnorm
+        if ((resnorm < atol) || (reslresnorm < rtol)) break end
+        s = A * vec(x) - Y
+        Ad = A * vec(grad)
+        alpha = (s' * Ad) / (Ad' * Ad)
+        x = projection(x - alpha * grad)
+    end
+    return (;x, hist)
+end
+res = optimal_gradient_descent(ones(size(X)), 1, 1000);
+relative_error(X, res.x)
+plot(res.hist, yaxis=:log, labels="resnorm")
+
+# size_tensor = (3,3,3) # size of the tensor
+# r = 1 # rank of the tensor
+# coefs = randn(r) # coefficients of rank-1 tensors
+# x0 = create_tensor_CP(size_tensor, coefs, "normal"); # create a rank-1 tensor by gaussian distribution
+# m = 3^6-1 # number of measurements
+# A, b = create_measurements(x0, m); # create gaussian linear measurements
+# # A should be a list of length m, each elements is a flattened tensor, i.e. a vector indeed of size 4*4*4=64.
+# # b is also a list of length m, each element is the number b_i = <A_i, x0>.
+
+# n, x = recovery_theta1(size_tensor, A, b); # using nuclear-2 theta-1 norm minimization for recovery
+# # n is the theta-1 norm, x is the recovered tensor
+
+# x = reshape(x, size(x0))# reshape the recovered tensor to its original shape
+# relative_error(x0,x) # compare the difference