StartingPoint RecentChanges

dt c45 disc3 r2 region montage

No hv

Shell:

mkdir dt
cd dt

geneListData=try8_gs_2200_10000_100_1100_500_500
data=try8_right_gs_2200_10000_100_1100_500_500
minArea=1
maxArea=49

nl -n rn ../gene_list_$geneListData.txt | awk '{print $2 "_" $1 ": continuous."}' > c45.names.1

echo "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,33,33,34,35,36,37,38,39,40,41,44,43,44,45,46,47,48,49.\n" | cat - c45.names.1 > c45.names

rm -f c45.names.1

cp c45.names c45_disc3_${data}.names.1

Octave:

data='try8_right_gs_2200_10000_100_1100_500_500';
origData='try8_right_gs_2200_10000_100_1100_500_500';
eof = 0;
line = 1;
lines = {};
f = fopen(['c45_disc3_' data '.names.1'],'r');
while ~eof
 lines{line} = fgets(f,99999);
 if lines{line} == -1
   eof = 1
 end
 line = line + 1;
 end
fclose(f);

load(['../data_' origData '_start2'])
lines2 = {};
j = 1;
for i=1:(line-1)
  if ~ismember(i-2, low_variance_genes)
    lines2{j} = lines{i};
    j = j + 1;
  end
end

f = fopen(['c45_disc3_' data '.names'],'w');
for i = 1:length(lines2)
  fprintf(f, '%s', lines2{i});

end
fclose(f);




load ../paint
[D3_3_disc, D3_3_disc_middle_thresh_mins, D3_3_disc_middle_thresh_maxs] = make_3_discretization(D3);
make_c45_dataset(D3_3_disc, paint3, 'c45_disc3_try8_right_gs_2200_10000_100_1100_500_500.data');


Shell:

for (( i = $minArea; i <= $maxArea; i++ )) do
  cp c45_disc3_${data}.names c45_disc3_${data}_region$i.names

  perl -e 's/0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,33,33,34,35,36,37,38,39,40,41,44,43,44,45,46,47,48,49./0,1./g;' -pi c45_disc3_${data}_region$i.names
  perl -e 's/\\n//;' -pi c45_disc3_${data}_region$i.names
done

for (( i = $minArea; i <= $maxArea; i++ )) do
  cp c45_disc3_${data}.data c45_disc3_${data}_region$i.data

  perl -e "s/, $i\$/, YES/g;" -pi c45_disc3_${data}_region$i.data
  perl -e 's/, \d+$/, NO/g;' -pi c45_disc3_${data}_region$i.data

  perl -e 's/, YES$/, 1/g;' -pi c45_disc3_${data}_region$i.data
  perl -e 's/, NO$/, 0/g;' -pi c45_disc3_${data}_region$i.data
done



for ((  i = $minArea ;  i <= $maxArea;  i++  )) ; do
  echo $i
  c4.5 -f c45_disc3_${data}_region$i -v 1 | tee c45_disc3_${data}_region$i.dt1
  c4.5rules -f c45_disc3_${data}_region$i | tee c45_disc3_${data}_region$i.r

  c4.5 -c`echo "\x01"` -f c45_disc3_${data}_region$i -v 1 | tee c45_disc3_${data}_region$i.dt12
  c4.5rules -F`echo -e "\x01"` -c`echo -e "\x01"` -f c45_disc3_${data}_region$i | tee c45_disc3_${data}_region$i.r2
done

Octave:

visualize_c45rules(['../data_' origData '_start2'], ['../' 'paint.mat'], ['../' 'paint_names.txt'], 'D3', ['c45_disc3_' data '_region%d.r2'])


With hv

Shell:

mkdir dt
cd dt

geneListData=try8_gs_2200_10000_100_1100_500_500
data=try8_right_gs_2200_10000_100_1100_500_500_hv
minArea=1
maxArea=49

nl -n rn ../gene_list_$geneListData.txt | awk '{print $2 "_" $1 ": continuous."}' > c45.names.1

echo "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,33,33,34,35,36,37,38,39,40,41,44,43,44,45,46,47,48,49.\n" | cat - c45.names.1 > c45.names

rm -f c45.names.1

cp c45.names c45_disc3_${data}.names.1

Octave:

data='try8_right_gs_2200_10000_100_1100_500_500_hv';
origData='try8_right_gs_2200_10000_100_1100_500_500';
eof = 0;
line = 1;
lines = {};
f = fopen(['c45_disc3_' data '.names.1'],'r');
while ~eof
 lines{line} = fgets(f,99999);
 if lines{line} == -1
   eof = 1
 end
 line = line + 1;
 end
fclose(f);

load(['../data_' origData '_start2'])
lines2 = {};
j = 1;
for i=1:(line-1)
  if ~ismember(i-1, low_variance_genes)
    lines2{j} = lines{i};
    j = j + 1;
  end
end

f = fopen(['c45_disc3_' data '.names'],'w');
for i = 1:length(lines2)
  fprintf(f, '%s', lines2{i});

end
fclose(f);

Shell:

for (( i = $minArea; i <= $maxArea; i++ )) do
  cp c45_disc3_${data}.names c45_disc3_${data}_region$i.names

  perl -e 's/0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,33,33,34,35,36,37,38,39,40,41,44,43,44,45,46,47,48,49./0,1./g;' -pi c45_disc3_${data}_region$i.names
  perl -e 's/\\n//;' -pi c45_disc3_${data}_region$i.names
done

Octave:

load ../paint
[D3_hv_3_disc, D3_hv_3_disc_middle_thresh_mins, D3_3_disc_middle_thresh_maxs] = make_3_discretization(D3_hv);
make_c45_dataset(D3_hv_3_disc, paint3, 'c45_disc3_try8_right_gs_2200_10000_100_1100_500_500_hv.data');

Shell:

for (( i = $minArea; i <= $maxArea; i++ )) do
  cp c45_disc3_${data}.data c45_disc3_${data}_region$i.data

  perl -e "s/, $i\$/, YES/g;" -pi c45_disc3_${data}_region$i.data
  perl -e 's/, \d+$/, NO/g;' -pi c45_disc3_${data}_region$i.data

  perl -e 's/, YES$/, 1/g;' -pi c45_disc3_${data}_region$i.data
  perl -e 's/, NO$/, 0/g;' -pi c45_disc3_${data}_region$i.data
done



for ((  i = $minArea ;  i <= $maxArea;  i++  )) ; do
  echo $i
  c4.5 -f c45_disc3_${data}_region$i -v 1 | tee c45_disc3_${data}_region$i.dt1
  c4.5rules -f c45_disc3_${data}_region$i | tee c45_disc3_${data}_region$i.r

  c4.5 -c`echo "\x01"` -f c45_disc3_${data}_region$i -v 1 | tee c45_disc3_${data}_region$i.dt12
  c4.5rules -F`echo -e "\x01"` -c`echo -e "\x01"` -f c45_disc3_${data}_region$i | tee c45_disc3_${data}_region$i.r2
done

Octave:

visualize_c45rules(['../data_' origData '_start2'], ['../' 'paint.mat'], ['../' 'paint_names.txt'], 'D3', ['c45_disc3_' data '_region%d.r2'])


Note that we give D3 to visualize_c45rule, rather than D3_hv, because the gene numbers have already been mapped back to the D3 numbering.

ACA

data='try8_right_gs_2200_10000_100_1100_500_500_hv';
origData='data_try8_right_gs_2200_10000_100_1100_500_500_start2.mat';

cd dt
mkdir ACAd
cd ACAd
n=16;visualize_c45rule(sprintf(['../c45_disc3_' data '_region%d.r2'], n), ['../../' origData], 'D3', 'ACAd', n)