Shell:
mkdir dt cd dt geneListData=try8_gs_2200_10000_100_1100_500_500 data=try8_right_gs_2200_10000_100_1100_500_500 minArea=1 maxArea=49 nl -n rn ../gene_list_$geneListData.txt | awk '{print $2 "_" $1 ": continuous."}' > c45.names.1 echo "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,33,33,34,35,36,37,38,39,40,41,44,43,44,45,46,47,48,49.\n" | cat - c45.names.1 > c45.names rm -f c45.names.1 cp c45.names c45_disc3_${data}.names.1
Octave:
data='try8_right_gs_2200_10000_100_1100_500_500'; origData='try8_right_gs_2200_10000_100_1100_500_500'; eof = 0; line = 1; lines = {}; f = fopen(['c45_disc3_' data '.names.1'],'r'); while ~eof lines{line} = fgets(f,99999); if lines{line} == -1 eof = 1 end line = line + 1; end fclose(f); load(['../data_' origData '_start2']) lines2 = {}; j = 1; for i=1:(line-1) if ~ismember(i-2, low_variance_genes) lines2{j} = lines{i}; j = j + 1; end end f = fopen(['c45_disc3_' data '.names'],'w'); for i = 1:length(lines2) fprintf(f, '%s', lines2{i}); end fclose(f); load ../paint [D3_3_disc, D3_3_disc_middle_thresh_mins, D3_3_disc_middle_thresh_maxs] = make_3_discretization(D3); make_c45_dataset(D3_3_disc, paint3, 'c45_disc3_try8_right_gs_2200_10000_100_1100_500_500.data');
Shell:
for (( i = $minArea; i <= $maxArea; i++ )) do cp c45_disc3_${data}.names c45_disc3_${data}_region$i.names perl -e 's/0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,33,33,34,35,36,37,38,39,40,41,44,43,44,45,46,47,48,49./0,1./g;' -pi c45_disc3_${data}_region$i.names perl -e 's/\\n//;' -pi c45_disc3_${data}_region$i.names done for (( i = $minArea; i <= $maxArea; i++ )) do cp c45_disc3_${data}.data c45_disc3_${data}_region$i.data perl -e "s/, $i\$/, YES/g;" -pi c45_disc3_${data}_region$i.data perl -e 's/, \d+$/, NO/g;' -pi c45_disc3_${data}_region$i.data perl -e 's/, YES$/, 1/g;' -pi c45_disc3_${data}_region$i.data perl -e 's/, NO$/, 0/g;' -pi c45_disc3_${data}_region$i.data done for (( i = $minArea ; i <= $maxArea; i++ )) ; do echo $i c4.5 -f c45_disc3_${data}_region$i -v 1 | tee c45_disc3_${data}_region$i.dt1 c4.5rules -f c45_disc3_${data}_region$i | tee c45_disc3_${data}_region$i.r c4.5 -c`echo "\x01"` -f c45_disc3_${data}_region$i -v 1 | tee c45_disc3_${data}_region$i.dt12 c4.5rules -F`echo -e "\x01"` -c`echo -e "\x01"` -f c45_disc3_${data}_region$i | tee c45_disc3_${data}_region$i.r2 done
Octave:
visualize_c45rules(['../data_' origData '_start2'], ['../' 'paint.mat'], ['../' 'paint_names.txt'], 'D3', ['c45_disc3_' data '_region%d.r2'])
Shell:
mkdir dt cd dt geneListData=try8_gs_2200_10000_100_1100_500_500 data=try8_right_gs_2200_10000_100_1100_500_500_hv minArea=1 maxArea=49 nl -n rn ../gene_list_$geneListData.txt | awk '{print $2 "_" $1 ": continuous."}' > c45.names.1 echo "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,33,33,34,35,36,37,38,39,40,41,44,43,44,45,46,47,48,49.\n" | cat - c45.names.1 > c45.names rm -f c45.names.1 cp c45.names c45_disc3_${data}.names.1
Octave:
data='try8_right_gs_2200_10000_100_1100_500_500_hv'; origData='try8_right_gs_2200_10000_100_1100_500_500'; eof = 0; line = 1; lines = {}; f = fopen(['c45_disc3_' data '.names.1'],'r'); while ~eof lines{line} = fgets(f,99999); if lines{line} == -1 eof = 1 end line = line + 1; end fclose(f); load(['../data_' origData '_start2']) lines2 = {}; j = 1; for i=1:(line-1) if ~ismember(i-1, low_variance_genes) lines2{j} = lines{i}; j = j + 1; end end f = fopen(['c45_disc3_' data '.names'],'w'); for i = 1:length(lines2) fprintf(f, '%s', lines2{i}); end fclose(f);
Shell:
for (( i = $minArea; i <= $maxArea; i++ )) do cp c45_disc3_${data}.names c45_disc3_${data}_region$i.names perl -e 's/0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,33,33,34,35,36,37,38,39,40,41,44,43,44,45,46,47,48,49./0,1./g;' -pi c45_disc3_${data}_region$i.names perl -e 's/\\n//;' -pi c45_disc3_${data}_region$i.names done
Octave:
load ../paint [D3_hv_3_disc, D3_hv_3_disc_middle_thresh_mins, D3_3_disc_middle_thresh_maxs] = make_3_discretization(D3_hv); make_c45_dataset(D3_hv_3_disc, paint3, 'c45_disc3_try8_right_gs_2200_10000_100_1100_500_500_hv.data');
Shell:
for (( i = $minArea; i <= $maxArea; i++ )) do cp c45_disc3_${data}.data c45_disc3_${data}_region$i.data perl -e "s/, $i\$/, YES/g;" -pi c45_disc3_${data}_region$i.data perl -e 's/, \d+$/, NO/g;' -pi c45_disc3_${data}_region$i.data perl -e 's/, YES$/, 1/g;' -pi c45_disc3_${data}_region$i.data perl -e 's/, NO$/, 0/g;' -pi c45_disc3_${data}_region$i.data done for (( i = $minArea ; i <= $maxArea; i++ )) ; do echo $i c4.5 -f c45_disc3_${data}_region$i -v 1 | tee c45_disc3_${data}_region$i.dt1 c4.5rules -f c45_disc3_${data}_region$i | tee c45_disc3_${data}_region$i.r c4.5 -c`echo "\x01"` -f c45_disc3_${data}_region$i -v 1 | tee c45_disc3_${data}_region$i.dt12 c4.5rules -F`echo -e "\x01"` -c`echo -e "\x01"` -f c45_disc3_${data}_region$i | tee c45_disc3_${data}_region$i.r2 done
Octave:
visualize_c45rules(['../data_' origData '_start2'], ['../' 'paint.mat'], ['../' 'paint_names.txt'], 'D3', ['c45_disc3_' data '_region%d.r2'])
Note that we give D3
to visualize_c45rule
, rather than D3_hv
, because the gene numbers have already been mapped back to the D3 numbering.
data='try8_right_gs_2200_10000_100_1100_500_500_hv'; origData='data_try8_right_gs_2200_10000_100_1100_500_500_start2.mat'; cd dt mkdir ACAd cd ACAd n=16;visualize_c45rule(sprintf(['../c45_disc3_' data '_region%d.r2'], n), ['../../' origData], 'D3', 'ACAd', n)