-
Notifications
You must be signed in to change notification settings - Fork 0
/
Analysis.jl
122 lines (104 loc) · 6.72 KB
/
Analysis.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
Base.include(Main,"SymGrpAndReps.jl")
using GRUtils
using StatsBase
using Statistics
obs = readlines("UPENNPLASMA.csv")
VCmap = Dict{String,Int8}(["bl"=>0,"m06"=>6,"m12"=>12,"m18"=>18,"m24"=>24,"m36"=>36,"m48"=>48])
parsedObs = map(obs[2:end]) do line
l = split(replace(line, '"' => ""), ",")[1:4]
return (parse(Int16,l[1]), VCmap[l[2]], l[3]=="" ? -1 : parse(Float64,l[3]), l[4]=="" ? -1 : parse(Float64,l[4]))
end
cleanObs = filter(y-> !(-1 in y), parsedObs)
RIDs = sort(unique(first.(cleanObs)))
N = length(RIDs)
byPat = []
for r in RIDs
patObs = filter(x->x[1]==r,cleanObs)
if length(patObs)!=1
t = map(x->x[2],patObs)
AB40 = map(x->x[3],patObs)
AB42 = map(x->x[4],patObs)
push!(byPat, (r,t,AB40,AB42,AB42 ./ AB40))
end
end
Samp = filter(p->p[2]==[0,12,24,36],byPat)
n = length(Samp)
sRIDs = [Int(p[1]) for p in Samp]
DX = Dict{Int,Array{Int}}([r=>[] for r in sRIDs])
info = readlines("DXSUM_PDXCONV_ADNIALL.csv")
for line in info[2:end]
l = split(replace(line,'"'=>""),",")
r = parse(Int,l[3])
if r in sRIDs
if length(l[12])!=0
d = parse(Int,l[12])
push!(DX[r], d)
end
end
end
for r in sRIDs DX[r] = unique(DX[r]) end
# 3=AD,2=MCI,1=NL
G = zeros((3,4,372,103));
Gr = zeros((3,4,134));
D = zeros((2,2,2,4,372,103));
Dr = zeros((2,2,2,4,134));
for p in Samp
c = length(DX[p[1]])
e = Int.(DX[p[1]])
e_1 = Int(1 in e)+1
e_2 = Int(2 in e)+1
e_3 = Int(3 in e)+1
g_k = maximum(e)
for i in 1:4
D[e_1, e_2, e_3, i, Int(round(p[3][i])), Int(round(p[4][i])) ] += 1/4
Dr[e_1, e_2, e_3, i, Int(round(p[4][i]/p[3][i] *100)) ] += 1/4
G[ g_k, i,Int(round(p[3][i])), Int(round(p[4][i]))] += 1/4
Gr[ g_k, i,Int(round(p[4][i]/p[3][i] *100)) ] += 1/4
end
end
D/=n;
Dr/=n;
G/= n;
Gr/=n;
d = zeros(2,2,2,3,372,103);
dr = zeros(2,2,2,3,134);
for i in 1:3
d[:,:,:,i,:,:] = (D[:,:,:,i+1,:,:] - D[:,:,:,i,:,:])
dr[:,:,:,i,:] = (Dr[:,:,:,i+1,:] - Dr[:,:,:,i,:])
end
F(x::Array) = [sum([x[j]*exp(-im*2*π*k*(j-1)/length(x)) for j in 1:length(x)]) for k in 1:length(x)]
⊗(A::Array{T},B::Array{T}) where T<: Number = prod.(Base.product(A,B))
E(X::Array,i::K) where {K<:Integer} =dropdims( sum(X,dims=setdiff(1:ndims(X),i)),dims=tuple(setdiff(1:ndims(X),i)...) )
E(X::Array,I::NTuple) =dropdims(sum(X,dims=setdiff(1:ndims(X),I)),dims=tuple(setdiff(1:ndims(X),I)...))
cov(X::Array,i::K,j::K) where {K<:Integer} = E(X,(i,j))- E(X,i)⊗E(X,j) / sum(E(X,i)⊗E(X,j))
cov(X::Array,I::NTuple,j::K) where {K<:Integer} = E(X,(I...,j))-E(X,I)⊗E(X,j)
cov(X::Array,i::K,J::NTuple) where {K<:Integer} = E(X,(i,J...))-E(X,i)⊗E(X,J)
cov(X::Array,I::NTuple,J::NTuple) =E(X,(I...,J...))-E(I)⊗E(J)
data = (t=[p[2] for p in Samp],AB40=[p[3] for p in Samp],AB42=[p[4] for p in Samp],ABratio=[p[5] for p in Samp])
U = sum([1/6 * permutedims(D,vcat(p,[4,5,6])) for p in Sym(3)]);
A = D-U;
c = E(G,1);
G_u1 = sum([G[i,:,:,:] for i in 1:3]);
G_a = sum([G[i,:,:,:]- c[i]*G_u1 for i in 1:3]);
k = sum(1/2*abs.(G_a))
1/ (1+k)
Gr_u1 = sum([Gr[i,:,:] for i in 1:3]);
Gr_a = sum([Gr[i,:,:]- c[i]*Gr_u1 for i in 1:3]);
k_r = sum(1/2*abs.(Gr_a))
1/ (1+k_r)
Alz = G[3,:,:,:];
Alz_ut = sum([Alz[t,:,:] for t in 1:4]);
Alz_a = sum([Alz[t,:,:] - Alz_ut for t in 1:4]);
k_alz = sum(1/2*abs.(Alz_a))
1/ (1+k_alz)
Alzr = Gr[1,:,:];
Alzr_ut = sum([Alzr[t,:] for t in 1:4]);
Alzr_a = sum([Alzr[t,:] - Alzr_ut for t in 1:4]);
k_alzr = sum(1/2*abs.(Alzr_a))
1/ (1+k_alzr)
Alz = G[3,:,:,:];
Alz_ut = sum([Alz[t,:,:] for t in 1:4]);
Alz_a = sum([Alz[t,:,:] - Alz_ut for t in 1:4]);
k_alz = sum(1/2*abs.(Alz_a))
1/ (1+k_alz)
s = "AGTTTCCTCGGCAGCGGTAGGCGAGAGCACGCGGAGGAGCGTGCGCGGGGGCCCCGGGAGACGGCGGCGGTGGCGGCGCGGGCAGAGCAAGGACGCGGCGGATCCCACTCGCACAGCAGCGCACTCGGTGCCCCGCGCAGGGTCGCGATGCTGCCCGGTTTGGCACTGCTCCTGCTGGCCGCCTGGACGGCTCGGGCGCTGGAGGTACCCACTGATGGTAATGCTGGCCTGCTGGCTGAACCCCAGATTGCCATGTTCTGTGGCAGACTGAACATGCACATGAATGTCCAGAATGGGAAGTGGGATTCAGATCCATCAGGGACCAAAACCTGCATTGATACCAAGGAAGGCATCCTGCAGTATTGCCAAGAAGTCTACCCTGAACTGCAGATCACCAATGTGGTAGAAGCCAACCAACCAGTGACCATCCAGAACTGGTGCAAGCGGGGCCGCAAGCAGTGCAAGACCCATCCCCACTTTGTGATTCCCTACCGCTGCTTAGTTGGTGAGTTTGTAAGTGATGCCCTTCTCGTTCCTGACAAGTGCAAATTCTTACACCAGGAGAGGATGGATGTTTGCGAAACTCATCTTCACTGGCACACCGTCGCCAAAGAGACATGCAGTGAGAAGAGTACCAACTTGCATGACTACGGCATGTTGCTGCCCTGCGGAATTGACAAGTTCCGAGGGGTAGAGTTTGTGTGTTGCCCACTGGCTGAAGAAAGTGACAATGTGGATTCTGCTGATGCGGAGGAGGATGACTCGGATGTCTGGTGGGGCGGAGCAGACACAGACTATGCAGATGGGAGTGAAGACAAAGTAGTAGAAGTAGCAGAGGAGGAAGAAGTGGCTGAGGTGGAAGAAGAAGAAGCCGATGATGACGAGGACGATGAGGATGGTGATGAGGTAGAGGAAGAGGCTGAGGAACCCTACGAAGAAGCCACAGAGAGAACCACCAGCATTGCCACCACCACCACCACCACCACAGAGTCTGTGGAAGAGGTGGTTCGAGTTCCTACAACAGCAGCCAGTACCCCTGATGCCGTTGACAAGTATCTCGAGACACCTGGGGATGAGAATGAACATGCCCATTTCCAGAAAGCCAAAGAGAGGCTTGAGGCCAAGCACCGAGAGAGAATGTCCCAGGTCATGAGAGAATGGGAAGAGGCAGAACGTCAAGCAAAGAACTTGCCTAAAGCTGATAAGAAGGCAGTTATCCAGCATTTCCAGGAGAAAGTGGAATCTTTGGAACAGGAAGCAGCCAACGAGAGACAGCAGCTGGTGGAGACACACATGGCCAGAGTGGAAGCCATGCTCAATGACCGCCGCCGCCTGGCCCTGGAGAACTACATCACCGCTCTGCAGGCTGTTCCTCCTCGGCCTCGTCACGTGTTCAATATGCTAAAGAAGTATGTCCGCGCAGAACAGAAGGACAGACAGCACACCCTAAAGCATTTCGAGCATGTGCGCATGGTGGATCCCAAGAAAGCCGCTCAGATCCGGTCCCAGGTTATGACACACCTCCGTGTGATTTATGAGCGCATGAATCAGTCTCTCTCCCTGCTCTACAACGTGCCTGCAGTGGCCGAGGAGATTCAGGATGAAGTTGATGAGCTGCTTCAGAAAGAGCAAAACTATTCAGATGACGTCTTGGCCAACATGATTAGTGAACCAAGGATCAGTTACGGAAACGATGCTCTCATGCCATCTTTGACCGAAACGAAAACCACCGTGGAGCTCCTTCCCGTGAATGGAGAGTTCAGCCTGGACGATCTCCAGCCGTGGCATTCTTTTGGGGCTGACTCTGTGCCAGCCAACACAGAAAACGAAGTTGAGCCTGTTGATGCCCGCCCTGCTGCCGACCGAGGACTGACCACTCGACCAGGTTCTGGGTTGACAAATATCAAGACGGAGGAGATCTCTGAAGTGAAGATGGATGCAGAATTCCGACATGACTCAGGATATGAAGTTCATCATCAAAAATTGGTGTTCTTTGCAGAAGATGTGGGTTCAAACAAAGGTGCAATCATTGGACTCATGGTGGGCGGTGTTGTCATAGCGACAGTGATCGTCATCACCTTGGTGATGCTGAAGAAGAAACAGTACACATCCATTCATCATGGTGTGGTGGAGGTTGACGCCGCTGTCACCCCAGAGGAGCGCCACCTGTCCAAGATGCAGCAGAACGGCTACGAAAATCCAACCTACAAGTTCTTTGAGCAGATGCAGAACTAGACCCCCGCCACAGCAGCCTCTGAAGTTGGACAGCAAAACCATTGCTTCACTACCCATCGGTGTCCATTTATAGAATAATGTGGGAAGAAACAAACCCGTTTTATGATTTACTCATTATCGCCTTTTGACAGCTGTGCTGTAACACAAGTAGATGCCTGAACTTGAATTAATCCACACATCAGTAATGTATTCTATCTCTCTTTACATTTTGGTCTCTATACTACATTATTAATGGGTTTTGTGTACTGTAAAGAATTTAGCTGTATCAAACTAGTGCATGAATAGATTCTCTCCTGATTATTTATCACATAGCCCCTTAGCCAGTTGTATATTATTCTTGTGGTTTGTGACCCAATTAAGTCCTACTTTACATATGCTTTAAGAATCGATGGGGGATGCTTCATGTGAACGTGGGAGTTCAGCTGCTTCTCTTGCCTAAGTATTCCTTTCCTGATCACTATGCATTTTAAAGTTAAACATTTTTAAGTATTTCAGATGCTTTAGAGAGATTTTTTTTCCATGACTGCATTTTACTGTACAGATTGCTGCTTCTGCTATATTTGTGATATAGGAATTAAGAGGATACACACGTTTGTTTCTTCGTGCCTGTTTTATGTGCACACATTAGGCATTGAGACTTCAAGCTTTTCTTTTTTTGTCCACGTATCTTTGGGTCTTTGATAAAGAAAAGAATCCCTGTTCATTGTAAGCACTTTTACGGGGCGGGTGGGGAGGGGTGCTCTGCTGGTCTTCAATTACCAAGAATTCTCCAAAACAATTTTCTGCAGGATGATTGTACAGAATCATTGCTTATGACATGATCGCTTTCTACACTGTATTACATAAATAAATTAAATAAAATAACCCCGGGCAAGACTTTTCTTTGAAGGATGACTACAGACATTAAATAATCGAAGTAATTTTGGGTGGGGAGAAGAGGCAGATTCAATTTTCTTTAACCAGTCTGAAGTTTCATTTATGATACAAAAGAAGATGAAAATGGAAGTGGCAATATAAGGGGATGAGGAAGGCATGCCTGGACAAACCCTTCTTTTAAGATGTGTCTTCAATTTGTATAAAATGGTGTTTTCATGTAAATAAATACATTCTTGGAGGAGC"