import pandas as pd
import numpy as np
from IPython.core.display import display, HTML
import matplotlib.pyplot as plt
import seaborn as sns
def construct_df(filename):
df = pd.read_csv(filename,skiprows = 1 ,names = ['4mers','origin1','origin2','origin3','quantity1','quantity2','quantity3'])
origin1 = df['origin1']/df['origin1'].sum()
df['normalized origin1'] = origin1
origin2 = df['origin2']/df['origin2'].sum()
df['normalized origin2'] = origin2
origin3 = df['origin3']/df['origin3'].sum()
df['normalized origin3'] = origin3
quantity1 = df['quantity1']/df['quantity1'].sum()
df['normalized quantity1'] = quantity1
quantity2 = df['quantity2']/df['quantity2'].sum()
df['normalized quantity2'] = quantity2
quantity3 = df['quantity3']/df['quantity3'].sum()
df['normalized quantity3'] = quantity3
df['5end'] = df['4mers'].apply( lambda x: x[0])
df['3end'] = df['4mers'].apply( lambda x: x[-1])
df['CPD'] = df['4mers'].apply( lambda x: x[1:3])
df["quanti mean"] = np.mean(df[["normalized quantity1",'normalized quantity2','normalized quantity3']], axis = 1)
df["quanti std"] = np.std(df[["normalized quantity1",'normalized quantity2','normalized quantity3']], axis = 1)
df["origin mean"] = np.mean(df[["normalized origin1",'normalized origin2','normalized origin3']], axis = 1)
df["origin std"] = np.std(df[["normalized origin1",'normalized origin2','normalized origin3']], axis = 1)
output = df.to_html(formatters={
'normalized origin1': '{:,.1%}'.format,
'normalized origin2': '{:,.1%}'.format,
'normalized origin3': '{:,.1%}'.format,
'normalized quantity1': '{:,.1%}'.format,
'normalized quantity2': '{:,.1%}'.format,
'normalized quantity3': '{:,.1%}'.format,
'quanti mean': '{:,.1%}'.format,
'quanti std': '{:,.1%}'.format,
'origin mean': '{:,.1%}'.format,
'origin std': '{:,.1%}'.format
})
print filename
display(HTML(output))
return df
def transform_df(df, PP):
names = ['A','C','G','T']
pypy = df[df['CPD'] == PP].sort(['5end','3end'], ascending=True)['quanti mean'].reshape(4,4)
pypy = pd.DataFrame(pypy)
three_ends = pypy.sum(axis = 0)
five_ends = pypy.sum(axis = 1)
columns = [ name + ': {:.0%}'.format(three_end) for (name , three_end)in zip(names , three_ends)]
pypy.columns = columns
index = [ name + ': {:.0%}'.format(five_end) for (name , five_end)in zip(names , five_ends)]
pypy.index = index
pypy.index.name = '5\'X'
pypy.columns.name = '3\'Y'
return pypy
def plt_CPD_HEATMAP(df, uv, filepath):
# df["quanti mean"] = np.mean(df[["normalized quantity1",'normalized quantity2','normalized quantity3']], axis = 1)
# df["quanti std"] = np.std(df[["normalized quantity1",'normalized quantity2','normalized quantity3']], axis = 1)
max_y = 0.07
tt = transform_df(df, 'TT')
tc = transform_df(df,'TC')
ct = transform_df(df, 'CT')
cc = transform_df(df,'CC')
all_data = [tt, tc, ct ,cc]
fig = plt.figure(figsize=(40, 7))
i = 0
fig.subplots_adjust(wspace=.3, top = 0.85)
fig.suptitle(uv , fontsize = 30)
CPD = ['TT','TC','CT','CC']
for data in all_data :
sns.set(font_scale=2)
ax = fig.add_subplot(141 + i)
ax = sns.heatmap(data,vmin=0, vmax=max_y, annot=True, fmt = '.1%',annot_kws={"size":30})
total = np.sum(np.sum(data))
ax.set_title(CPD[i] + ' : {:.0%}'.format(total), fontsize = 30)
i=i+1
filename = uv
plt.savefig(filepath + '%s_HEATMAP.pdf'%(filename), format = 'pdf',dpi=300)
def plt_total(df, uv, file_path):
max_y = np.max(df["quanti mean"]+ 0.01)
df = df.sort(["quanti mean"], ascending=False)
fig = plt.figure()
ax = fig.add_subplot(111)
## the data
N = 64
## necessary variables
ind = np.arange(N) # the x locations for the groups
width = 0.4 # the width of the bars
## the bars
rects = ax.bar(ind+width, df["quanti mean"], width,
color='grey',
yerr= df["quanti std"],
error_kw=dict(elinewidth=0.5,ecolor='red'))
# axes and labels
ax.set_xlim(0,len(ind)+width)
ax.set_ylim(0, max_y)
ax.set_ylabel('normalized relative Qty', fontsize = 12)
ax.set_title(uv, fontsize = 20)
xTickMarks = df['4mers']
ax.set_xticks(ind+2*width)
xtickNames = ax.set_xticklabels(xTickMarks)
plt.setp(xtickNames, rotation=90)
vals = ax.get_yticks()
ax.set_yticklabels(['{:.0f}%'.format(x*100) for x in vals])
plt.tick_params(axis='both', which='major', labelsize=7)
plt.tick_params(axis='both', which='minor', labelsize=7)
filename = uv
plt.savefig(file_path + '%s_barplot.pdf'%(filename), format = 'pdf',dpi=300)
%matplotlib inline
names = ["UVC","BB UVB","FiltBB UVB","NB UVB"]
for j in range(len(names)):
filepath = '/Users/Chen/Documents/research/all data/data analysis/'
filename = filepath + '%s.csv' %(names[j])
df = construct_df(filename)
#max_c_containing_cpd_yield = np.max(df[df['CPD'] != 'TT' ]['quanti mean'])
#max_c_containing_cpd = df[df['quanti mean'] == max_c_containing_cpd_yield ]['4mers']
#print filename, max_c_containing_cpd
plt_CPD_HEATMAP(df, names[j], filepath)
plt_total(df, names[j], filepath)
/Users/Chen/Documents/research/all data/data analysis/UVC.csv
4mers | origin1 | origin2 | origin3 | quantity1 | quantity2 | quantity3 | normalized origin1 | normalized origin2 | normalized origin3 | normalized quantity1 | normalized quantity2 | normalized quantity3 | 5end | 3end | CPD | quanti mean | quanti std | origin mean | origin std | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | GTTG | 98.232330 | 112.397776 | 72.249041 | 1.894604 | 1.155363 | 1.227031 | 0.6% | 0.6% | 0.7% | 1.0% | 0.6% | 0.7% | G | G | TT | 0.8% | 0.2% | 0.6% | 0.0% |
1 | ACCG | 45.454714 | 100.246222 | 71.285144 | 1.091761 | 0.894071 | 0.989051 | 0.3% | 0.6% | 0.7% | 0.6% | 0.5% | 0.6% | A | G | CC | 0.6% | 0.0% | 0.5% | 0.2% |
2 | TCTA | 325.303726 | 270.059716 | 100.550238 | 3.011755 | 2.076148 | 1.976414 | 1.9% | 1.5% | 0.9% | 1.6% | 1.2% | 1.2% | T | A | CT | 1.3% | 0.2% | 1.4% | 0.4% |
3 | CTCT | 130.798608 | 59.782772 | 106.257716 | 2.581004 | 1.833221 | 1.843921 | 0.7% | 0.3% | 1.0% | 1.4% | 1.0% | 1.1% | C | T | TC | 1.2% | 0.2% | 0.7% | 0.3% |
4 | ACTC | 125.563298 | 131.469449 | 52.947096 | 0.831735 | 0.660327 | 0.881032 | 0.7% | 0.7% | 0.5% | 0.4% | 0.4% | 0.5% | A | C | CT | 0.4% | 0.1% | 0.6% | 0.1% |
5 | GTCA | 23.187486 | 103.684129 | 22.415362 | 0.727549 | 0.589369 | 0.627018 | 0.1% | 0.6% | 0.2% | 0.4% | 0.3% | 0.4% | G | A | TC | 0.4% | 0.0% | 0.3% | 0.2% |
6 | CTCG | 236.748271 | 173.913425 | 114.360359 | 2.860291 | 2.784059 | 2.422837 | 1.3% | 1.0% | 1.1% | 1.5% | 1.6% | 1.4% | C | G | TC | 1.5% | 0.0% | 1.1% | 0.2% |
7 | TCTC | 81.263133 | 55.439150 | 51.343966 | 1.239722 | 0.371486 | 0.919851 | 0.5% | 0.3% | 0.5% | 0.7% | 0.2% | 0.5% | T | C | CT | 0.5% | 0.2% | 0.4% | 0.1% |
8 | GTCT | 23.700021 | 26.111412 | 38.491997 | 0.825606 | 0.503385 | 0.764573 | 0.1% | 0.1% | 0.4% | 0.4% | 0.3% | 0.5% | G | T | TC | 0.4% | 0.1% | 0.2% | 0.1% |
9 | TTTG | 769.420615 | 444.117118 | 329.288253 | 7.933872 | 7.128359 | 6.810273 | 4.4% | 2.4% | 3.1% | 4.3% | 4.0% | 4.0% | T | G | TT | 4.1% | 0.1% | 3.3% | 0.8% |
10 | TTTT | 305.394881 | 176.411181 | 149.667480 | 3.642997 | 2.673865 | 2.498788 | 1.7% | 1.0% | 1.4% | 2.0% | 1.5% | 1.5% | T | T | TT | 1.6% | 0.2% | 1.4% | 0.3% |
11 | ATTT | 170.885227 | 82.452349 | 79.520419 | 1.891977 | 1.652070 | 1.701302 | 1.0% | 0.5% | 0.7% | 1.0% | 0.9% | 1.0% | A | T | TT | 1.0% | 0.0% | 0.7% | 0.2% |
12 | ACTA | 185.079447 | 365.387286 | 113.119233 | 2.154630 | 1.833221 | 1.530835 | 1.1% | 2.0% | 1.1% | 1.2% | 1.0% | 0.9% | A | A | CT | 1.0% | 0.1% | 1.4% | 0.4% |
13 | ATCA | 211.335967 | 247.605688 | 131.501337 | 2.404151 | 2.107036 | 1.947721 | 1.2% | 1.4% | 1.2% | 1.3% | 1.2% | 1.2% | A | A | TC | 1.2% | 0.1% | 1.3% | 0.1% |
14 | CCTA | 408.136120 | 331.111103 | 136.783346 | 3.014381 | 2.389198 | 2.070931 | 2.3% | 1.8% | 1.3% | 1.6% | 1.3% | 1.2% | C | A | CT | 1.4% | 0.2% | 1.8% | 0.4% |
15 | CCCT | 121.031183 | 83.542187 | 73.936608 | 1.291377 | 1.125311 | 0.892846 | 0.7% | 0.5% | 0.7% | 0.7% | 0.6% | 0.5% | C | T | CC | 0.6% | 0.1% | 0.6% | 0.1% |
16 | GCCC | 43.997773 | 98.264397 | 32.171432 | 0.668890 | 0.668675 | 0.535876 | 0.3% | 0.5% | 0.3% | 0.4% | 0.4% | 0.3% | G | C | CC | 0.3% | 0.0% | 0.4% | 0.1% |
17 | CCCG | 133.905628 | 298.555297 | 73.401759 | 1.260735 | 1.057692 | 0.780607 | 0.8% | 1.6% | 0.7% | 0.7% | 0.6% | 0.5% | C | G | CC | 0.6% | 0.1% | 1.0% | 0.4% |
18 | TCCC | 110.764596 | 42.429475 | 69.186004 | 1.279996 | 1.071883 | 0.891158 | 0.6% | 0.2% | 0.6% | 0.7% | 0.6% | 0.5% | T | C | CC | 0.6% | 0.1% | 0.5% | 0.2% |
19 | GTCC | 47.862787 | 68.585267 | 35.232019 | 0.655757 | 0.675353 | 0.456550 | 0.3% | 0.4% | 0.3% | 0.4% | 0.4% | 0.3% | G | C | TC | 0.3% | 0.0% | 0.3% | 0.0% |
20 | GCCG | 36.950919 | 83.015128 | 32.329862 | 0.548945 | 0.400704 | 0.389038 | 0.2% | 0.5% | 0.3% | 0.3% | 0.2% | 0.2% | G | G | CC | 0.2% | 0.0% | 0.3% | 0.1% |
21 | ACTG | 36.922960 | 87.464840 | 54.767960 | 0.866000 | 1.893000 | 0.886000 | 0.2% | 0.5% | 0.5% | 0.5% | 1.1% | 0.5% | A | G | CT | 0.7% | 0.3% | 0.4% | 0.1% |
22 | ACCA | 125.111611 | 182.020200 | 116.074520 | 1.830000 | 2.476000 | 2.271000 | 0.7% | 1.0% | 1.1% | 1.0% | 1.4% | 1.3% | A | A | CC | 1.2% | 0.2% | 0.9% | 0.2% |
23 | ATTA | 699.430689 | 854.874310 | 462.002960 | 8.041000 | 7.779000 | 8.723000 | 4.0% | 4.7% | 4.3% | 4.3% | 4.3% | 5.2% | A | A | TT | 4.6% | 0.4% | 4.3% | 0.3% |
24 | TTTA | 1221.814837 | 1455.408670 | 732.633450 | 11.376000 | 11.790000 | 13.070000 | 6.9% | 8.0% | 6.8% | 6.1% | 6.6% | 7.8% | T | A | TT | 6.8% | 0.7% | 7.3% | 0.5% |
25 | CTTT | 753.245881 | 243.846290 | 392.499360 | 4.882000 | 4.113000 | 4.726000 | 4.3% | 1.3% | 3.7% | 2.6% | 2.3% | 2.8% | C | T | TT | 2.6% | 0.2% | 3.1% | 1.3% |
26 | GCTT | 103.137874 | 243.939680 | 63.662610 | 1.677000 | 1.548000 | 1.594000 | 0.6% | 1.3% | 0.6% | 0.9% | 0.9% | 0.9% | G | T | CT | 0.9% | 0.0% | 0.8% | 0.4% |
27 | CCTG | 47.205195 | 26.472900 | 118.930140 | 1.081000 | 1.162000 | 0.928000 | 0.3% | 0.1% | 1.1% | 0.6% | 0.6% | 0.6% | C | G | CT | 0.6% | 0.0% | 0.5% | 0.4% |
28 | TCCT | 80.279850 | 107.616230 | 141.106560 | 1.458000 | 1.586000 | 1.029000 | 0.5% | 0.6% | 1.3% | 0.8% | 0.9% | 0.6% | T | T | CC | 0.8% | 0.1% | 0.8% | 0.4% |
29 | ATCC | 185.113670 | 120.839130 | 44.421460 | 2.097000 | 2.355000 | 1.181000 | 1.1% | 0.7% | 0.4% | 1.1% | 1.3% | 0.7% | A | C | TC | 1.0% | 0.3% | 0.7% | 0.3% |
30 | CTCA | 371.911240 | 355.358180 | 237.439440 | 4.104000 | 4.015000 | 4.152000 | 2.1% | 2.0% | 2.2% | 2.2% | 2.2% | 2.5% | C | A | TC | 2.3% | 0.1% | 2.1% | 0.1% |
31 | CCTC | 116.738070 | 99.357680 | 79.144030 | 1.571000 | 1.321000 | 1.642000 | 0.7% | 0.5% | 0.7% | 0.8% | 0.7% | 1.0% | C | C | CT | 0.9% | 0.1% | 0.6% | 0.1% |
32 | GCCT | 42.213710 | 70.557650 | 31.261790 | 1.183000 | 1.228000 | 0.668000 | 0.2% | 0.4% | 0.3% | 0.6% | 0.7% | 0.4% | G | T | CC | 0.6% | 0.1% | 0.3% | 0.1% |
33 | CTTG | 316.914410 | 360.922280 | 209.467830 | 3.943000 | 3.622000 | 4.150000 | 1.8% | 2.0% | 2.0% | 2.1% | 2.0% | 2.5% | C | G | TT | 2.2% | 0.2% | 1.9% | 0.1% |
34 | CCTT | 120.244480 | 47.647340 | 69.895410 | 1.262000 | 1.437000 | 1.636000 | 0.7% | 0.3% | 0.7% | 0.7% | 0.8% | 1.0% | C | T | CT | 0.8% | 0.1% | 0.5% | 0.2% |
35 | ACCT | 97.808210 | 192.458060 | 72.412710 | 1.529000 | 1.751000 | 1.113000 | 0.6% | 1.1% | 0.7% | 0.8% | 1.0% | 0.7% | A | T | CC | 0.8% | 0.1% | 0.8% | 0.2% |
36 | TCCA | 252.335580 | 314.542770 | 144.528140 | 2.885000 | 2.928000 | 2.869000 | 1.4% | 1.7% | 1.3% | 1.5% | 1.6% | 1.7% | T | A | CC | 1.6% | 0.1% | 1.5% | 0.2% |
37 | CTCC | 286.859710 | 247.767490 | 187.988090 | 3.397000 | 3.156000 | 3.260000 | 1.6% | 1.4% | 1.8% | 1.8% | 1.8% | 1.9% | C | C | TC | 1.8% | 0.1% | 1.6% | 0.2% |
38 | GCTC | 76.071590 | 80.994960 | 51.542590 | 1.283000 | 1.299000 | 0.848000 | 0.4% | 0.4% | 0.5% | 0.7% | 0.7% | 0.5% | G | C | CT | 0.6% | 0.1% | 0.5% | 0.0% |
39 | TTCG | 376.739770 | 384.250640 | 215.279950 | 4.061000 | 3.937000 | 3.773000 | 2.1% | 2.1% | 2.0% | 2.2% | 2.2% | 2.2% | T | G | TC | 2.2% | 0.0% | 2.1% | 0.1% |
40 | CTTC | 416.146700 | 413.345800 | 255.910860 | 4.396000 | 4.330000 | 4.070000 | 2.4% | 2.3% | 2.4% | 2.4% | 2.4% | 2.4% | C | C | TT | 2.4% | 0.0% | 2.3% | 0.1% |
41 | ACTT | 141.463350 | 86.564890 | 156.516780 | 2.319000 | 1.622000 | 1.779000 | 0.8% | 0.5% | 1.5% | 1.2% | 0.9% | 1.1% | A | T | CT | 1.1% | 0.1% | 0.9% | 0.4% |
42 | TTCA | 669.675970 | 725.908680 | 317.528440 | 6.613000 | 6.604000 | 5.956000 | 3.8% | 4.0% | 3.0% | 3.5% | 3.7% | 3.5% | T | A | TC | 3.6% | 0.1% | 3.6% | 0.4% |
43 | TTTC | 671.141170 | 577.954420 | 419.976290 | 7.005000 | 6.669000 | 6.053000 | 3.8% | 3.2% | 3.9% | 3.8% | 3.7% | 3.6% | T | C | TT | 3.7% | 0.1% | 3.6% | 0.3% |
44 | GTTT | 208.506950 | 214.975070 | 147.349120 | 2.354000 | 2.173000 | 2.952000 | 1.2% | 1.2% | 1.4% | 1.3% | 1.2% | 1.8% | G | T | TT | 1.4% | 0.2% | 1.2% | 0.1% |
45 | ATTG | 600.380484 | 561.823916 | 344.453215 | 6.442953 | 7.160215 | 5.833914 | 3.4% | 3.1% | 3.2% | 3.5% | 4.0% | 3.5% | A | G | TT | 3.6% | 0.2% | 3.2% | 0.1% |
46 | CCCA | 336.107133 | 260.667568 | 189.392034 | 3.202855 | 3.361277 | 2.707637 | 1.9% | 1.4% | 1.8% | 1.7% | 1.9% | 1.6% | C | A | CC | 1.7% | 0.1% | 1.7% | 0.2% |
47 | CCCC | 80.480019 | 96.278561 | 55.080170 | 1.458497 | 1.645912 | 1.436801 | 0.5% | 0.5% | 0.5% | 0.8% | 0.9% | 0.9% | C | C | CC | 0.9% | 0.1% | 0.5% | 0.0% |
48 | ACCC | 114.872718 | 53.642936 | 73.784829 | 1.611493 | 1.622762 | 1.353819 | 0.7% | 0.3% | 0.7% | 0.9% | 0.9% | 0.8% | A | C | CC | 0.9% | 0.0% | 0.5% | 0.2% |
49 | CTTA | 1043.576232 | 1110.221141 | 341.847614 | 9.767601 | 9.723340 | 9.169976 | 5.9% | 6.1% | 3.2% | 5.2% | 5.4% | 5.4% | C | A | TT | 5.4% | 0.1% | 5.1% | 1.3% |
50 | TCTT | 227.532688 | 210.764274 | 585.948346 | 2.724742 | 2.788019 | 2.319766 | 1.3% | 1.2% | 5.5% | 1.5% | 1.6% | 1.4% | T | T | CT | 1.5% | 0.1% | 2.6% | 2.0% |
51 | ATCT | 230.452602 | 251.337548 | 82.279615 | 2.359363 | 2.428630 | 2.080144 | 1.3% | 1.4% | 0.8% | 1.3% | 1.4% | 1.2% | A | T | TC | 1.3% | 0.0% | 1.2% | 0.3% |
52 | GCCA | 99.627768 | 117.189014 | 80.787207 | 1.368914 | 1.591894 | 1.215826 | 0.6% | 0.6% | 0.8% | 0.7% | 0.9% | 0.7% | G | A | CC | 0.8% | 0.1% | 0.7% | 0.1% |
53 | TCTG | 111.309228 | 230.847188 | 125.666229 | 1.137406 | 1.545592 | 1.574793 | 0.6% | 1.3% | 1.2% | 0.6% | 0.9% | 0.9% | T | G | CT | 0.8% | 0.1% | 1.0% | 0.3% |
54 | TTCT | 442.727221 | 605.803031 | 340.806834 | 3.874227 | 4.205730 | 4.303875 | 2.5% | 3.3% | 3.2% | 2.1% | 2.3% | 2.6% | T | T | TC | 2.3% | 0.2% | 3.0% | 0.4% |
55 | ATTC | 617.022651 | 716.644004 | 382.722673 | 5.809830 | 5.509893 | 5.052578 | 3.5% | 3.9% | 3.6% | 3.1% | 3.1% | 3.0% | A | C | TT | 3.1% | 0.0% | 3.7% | 0.2% |
56 | GCTA | 180.445337 | 265.994364 | 134.113870 | 1.908427 | 1.907186 | 1.661505 | 1.0% | 1.5% | 1.3% | 1.0% | 1.1% | 1.0% | G | A | CT | 1.0% | 0.0% | 1.2% | 0.2% |
57 | ATCG | 238.753727 | 293.573227 | 154.866789 | 2.424789 | 2.258858 | 2.164058 | 1.4% | 1.6% | 1.4% | 1.3% | 1.3% | 1.3% | A | G | TC | 1.3% | 0.0% | 1.5% | 0.1% |
58 | GTTA | 624.090048 | 742.415526 | 329.428406 | 5.548126 | 5.013804 | 4.566807 | 3.5% | 4.1% | 3.1% | 3.0% | 2.8% | 2.7% | G | A | TT | 2.8% | 0.1% | 3.6% | 0.4% |
59 | TCCG | 336.147336 | 460.805400 | 216.345450 | 2.847541 | 3.446163 | 2.783160 | 1.9% | 2.5% | 2.0% | 1.5% | 1.9% | 1.7% | T | G | CC | 1.7% | 0.2% | 2.2% | 0.3% |
60 | TTCC | 638.424152 | 704.685437 | 351.999854 | 5.583355 | 5.523122 | 4.622749 | 3.6% | 3.9% | 3.3% | 3.0% | 3.1% | 2.7% | T | C | TC | 2.9% | 0.1% | 3.6% | 0.2% |
61 | GTTC | 389.750931 | 427.242910 | 205.690160 | 3.450467 | 3.355765 | 2.754256 | 2.2% | 2.3% | 1.9% | 1.9% | 1.9% | 1.6% | G | C | TT | 1.8% | 0.1% | 2.2% | 0.2% |
62 | GTCG | 105.191819 | 105.852666 | 50.856442 | 0.878722 | 0.892960 | 0.680639 | 0.6% | 0.6% | 0.5% | 0.5% | 0.5% | 0.4% | G | G | TC | 0.5% | 0.0% | 0.6% | 0.1% |
63 | GCTG | 114.301470 | 152.993135 | 72.632126 | 0.900866 | 1.155336 | 0.586468 | 0.7% | 0.8% | 0.7% | 0.5% | 0.6% | 0.3% | G | G | CT | 0.5% | 0.1% | 0.7% | 0.1% |
/Users/Chen/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:3: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....) app.launch_new_instance() /Users/Chen/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:3: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead app.launch_new_instance()
/Users/Chen/Documents/research/all data/data analysis/BB UVB.csv
4mers | origin1 | origin2 | origin3 | quantity1 | quantity2 | quantity3 | normalized origin1 | normalized origin2 | normalized origin3 | normalized quantity1 | normalized quantity2 | normalized quantity3 | 5end | 3end | CPD | quanti mean | quanti std | origin mean | origin std | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | GTTG | 73.026749 | 33.155274 | 58.152609 | 1.184548 | 0.821710 | 1.056279 | 0.6% | 0.4% | 0.9% | 0.7% | 0.5% | 0.6% | G | G | TT | 0.6% | 0.1% | 0.6% | 0.2% |
1 | ACCG | 165.392205 | 70.640132 | 103.462375 | 1.936628 | 1.735711 | 2.456113 | 1.3% | 0.8% | 1.6% | 1.1% | 1.0% | 1.5% | A | G | CC | 1.2% | 0.2% | 1.2% | 0.3% |
2 | TCTA | 343.394311 | 100.765022 | 101.036230 | 3.054364 | 2.323456 | 1.350377 | 2.7% | 1.1% | 1.6% | 1.7% | 1.4% | 0.8% | T | A | CT | 1.3% | 0.4% | 1.8% | 0.6% |
3 | CTCT | 125.986767 | 135.625061 | 154.791853 | 3.416410 | 2.686142 | 3.301313 | 1.0% | 1.5% | 2.4% | 1.9% | 1.6% | 2.0% | C | T | TC | 1.8% | 0.2% | 1.6% | 0.6% |
4 | ACTC | 157.423875 | 47.301912 | 35.394586 | 1.236011 | 1.007101 | 1.037732 | 1.2% | 0.5% | 0.5% | 0.7% | 0.6% | 0.6% | A | C | CT | 0.6% | 0.0% | 0.8% | 0.3% |
5 | GTCA | 26.752503 | 27.307255 | 52.788823 | 0.918205 | 1.518747 | 0.894658 | 0.2% | 0.3% | 0.8% | 0.5% | 0.9% | 0.5% | G | A | TC | 0.6% | 0.2% | 0.4% | 0.3% |
6 | CTCG | 236.463240 | 115.736288 | 153.147165 | 3.518433 | 2.820530 | 3.343706 | 1.8% | 1.3% | 2.4% | 2.0% | 1.6% | 2.0% | C | G | TC | 1.9% | 0.2% | 1.8% | 0.4% |
7 | TCTC | 94.599622 | 48.625199 | 66.595162 | 1.692856 | 0.886475 | 1.215251 | 0.7% | 0.6% | 1.0% | 0.9% | 0.5% | 0.7% | T | C | CT | 0.7% | 0.2% | 0.8% | 0.2% |
8 | GTCT | 43.821355 | 36.885586 | 37.856281 | 0.936262 | 0.981195 | 1.171092 | 0.3% | 0.4% | 0.6% | 0.5% | 0.6% | 0.7% | G | T | TC | 0.6% | 0.1% | 0.4% | 0.1% |
9 | TTTG | 450.247040 | 202.043801 | 200.167979 | 5.985037 | 4.780491 | 5.221339 | 3.5% | 2.3% | 3.1% | 3.4% | 2.8% | 3.1% | T | G | TT | 3.1% | 0.2% | 2.9% | 0.5% |
10 | TTTT | 229.144686 | 124.560535 | 109.285509 | 3.150970 | 2.429509 | 2.576225 | 1.8% | 1.4% | 1.7% | 1.8% | 1.4% | 1.5% | T | T | TT | 1.6% | 0.1% | 1.6% | 0.2% |
11 | ATTT | 96.372474 | 60.677181 | 53.805227 | 1.468045 | 1.322022 | 1.221433 | 0.7% | 0.7% | 0.8% | 0.8% | 0.8% | 0.7% | A | T | TT | 0.8% | 0.0% | 0.8% | 0.1% |
12 | ACTA | 151.065352 | 119.759067 | 77.456923 | 2.119005 | 1.907339 | 1.868801 | 1.2% | 1.4% | 1.2% | 1.2% | 1.1% | 1.1% | A | A | CT | 1.1% | 0.0% | 1.2% | 0.1% |
13 | ATCA | 198.162669 | 145.677284 | 100.156801 | 2.631827 | 2.350982 | 2.468478 | 1.5% | 1.7% | 1.5% | 1.5% | 1.4% | 1.5% | A | A | TC | 1.4% | 0.0% | 1.6% | 0.1% |
14 | CCTA | 238.090920 | 137.678062 | 100.070422 | 2.713987 | 2.499132 | 2.595655 | 1.8% | 1.6% | 1.5% | 1.5% | 1.5% | 1.5% | C | A | CT | 1.5% | 0.0% | 1.6% | 0.1% |
15 | CCCT | 201.064115 | 135.115213 | 97.705654 | 2.575850 | 2.124303 | 2.361614 | 1.6% | 1.5% | 1.5% | 1.4% | 1.2% | 1.4% | C | T | CC | 1.4% | 0.1% | 1.5% | 0.0% |
16 | GCCC | 96.688142 | 62.731736 | 44.336377 | 1.166491 | 1.078343 | 0.970611 | 0.7% | 0.7% | 0.7% | 0.7% | 0.6% | 0.6% | G | C | CC | 0.6% | 0.0% | 0.7% | 0.0% |
17 | CCCG | 178.029268 | 103.875318 | 74.346154 | 1.758765 | 1.742997 | 1.751339 | 1.4% | 1.2% | 1.1% | 1.0% | 1.0% | 1.0% | C | G | CC | 1.0% | 0.0% | 1.2% | 0.1% |
18 | TCCC | 154.220551 | 107.304977 | 78.592311 | 2.092822 | 1.720329 | 1.853787 | 1.2% | 1.2% | 1.2% | 1.2% | 1.0% | 1.1% | T | C | CC | 1.1% | 0.1% | 1.2% | 0.0% |
19 | GTCC | 62.475314 | 47.977591 | 33.272441 | 0.805348 | 0.705133 | 0.673864 | 0.5% | 0.5% | 0.5% | 0.5% | 0.4% | 0.4% | G | C | TC | 0.4% | 0.0% | 0.5% | 0.0% |
20 | GCCG | 77.090152 | 60.490186 | 40.176155 | 0.947097 | 0.880809 | 0.877877 | 0.6% | 0.7% | 0.6% | 0.5% | 0.5% | 0.5% | G | G | CC | 0.5% | 0.0% | 0.6% | 0.0% |
21 | ACTG | 59.390330 | 67.964500 | 47.340260 | 1.567000 | 1.583000 | 1.599000 | 0.5% | 0.8% | 0.7% | 0.9% | 0.9% | 0.9% | A | G | CT | 0.9% | 0.0% | 0.7% | 0.1% |
22 | ACCA | 177.123425 | 107.700280 | 87.820120 | 2.708000 | 2.371000 | 2.789000 | 1.4% | 1.2% | 1.3% | 1.5% | 1.4% | 1.7% | A | A | CC | 1.5% | 0.1% | 1.3% | 0.1% |
23 | ATTA | 336.049589 | 217.898770 | 191.469090 | 5.661000 | 5.162000 | 5.767000 | 2.6% | 2.5% | 2.9% | 3.2% | 3.0% | 3.4% | A | A | TT | 3.2% | 0.2% | 2.7% | 0.2% |
24 | TTTA | 794.706618 | 436.563370 | 314.749870 | 8.982000 | 8.730000 | 9.265000 | 6.1% | 4.9% | 4.8% | 5.0% | 5.1% | 5.5% | T | A | TT | 5.2% | 0.2% | 5.3% | 0.6% |
25 | CTTT | 465.865024 | 282.249090 | 209.621060 | 4.848000 | 3.422000 | 4.462000 | 3.6% | 3.2% | 3.2% | 2.7% | 2.0% | 2.6% | C | T | TT | 2.5% | 0.3% | 3.3% | 0.2% |
26 | GCTT | 114.430543 | 77.789540 | 46.080370 | 1.945000 | 2.180000 | 2.186000 | 0.9% | 0.9% | 0.7% | 1.1% | 1.3% | 1.3% | G | T | CT | 1.2% | 0.1% | 0.8% | 0.1% |
27 | CCTG | 66.046202 | 59.113690 | 122.389940 | 1.236000 | 1.219000 | 1.989000 | 0.5% | 0.7% | 1.9% | 0.7% | 0.7% | 1.2% | C | G | CT | 0.9% | 0.2% | 1.0% | 0.6% |
28 | TCCT | 194.864860 | 151.447000 | 104.958270 | 3.474000 | 2.821000 | 2.545000 | 1.5% | 1.7% | 1.6% | 1.9% | 1.6% | 1.5% | T | T | CC | 1.7% | 0.2% | 1.6% | 0.1% |
29 | ATCC | 205.230440 | 146.316040 | 26.656200 | 2.724000 | 2.611000 | 2.950000 | 1.6% | 1.7% | 0.4% | 1.5% | 1.5% | 1.7% | A | C | TC | 1.6% | 0.1% | 1.2% | 0.6% |
30 | CTCA | 361.790840 | 232.218830 | 171.564470 | 5.097000 | 4.760000 | 5.498000 | 2.8% | 2.6% | 2.6% | 2.9% | 2.8% | 3.3% | C | A | TC | 3.0% | 0.2% | 2.7% | 0.1% |
31 | CCTC | 101.918590 | 83.840130 | 53.011630 | 1.641000 | 1.456000 | 1.651000 | 0.8% | 0.9% | 0.8% | 0.9% | 0.9% | 1.0% | C | C | CT | 0.9% | 0.1% | 0.9% | 0.1% |
32 | GCCT | 95.400500 | 81.367360 | 49.065760 | 1.948000 | 2.124000 | 1.361000 | 0.7% | 0.9% | 0.8% | 1.1% | 1.2% | 0.8% | G | T | CC | 1.0% | 0.2% | 0.8% | 0.1% |
33 | CTTG | 191.976840 | 138.672110 | 98.956450 | 3.174000 | 2.999000 | 3.056000 | 1.5% | 1.6% | 1.5% | 1.8% | 1.8% | 1.8% | C | G | TT | 1.8% | 0.0% | 1.5% | 0.0% |
34 | CCTT | 122.905910 | 87.938510 | 55.555710 | 1.697000 | 1.412000 | 1.634000 | 0.9% | 1.0% | 0.9% | 1.0% | 0.8% | 1.0% | C | T | CT | 0.9% | 0.1% | 0.9% | 0.1% |
35 | ACCT | 186.382510 | 151.666680 | 96.173690 | 3.051000 | 3.197000 | 2.938000 | 1.4% | 1.7% | 1.5% | 1.7% | 1.9% | 1.7% | A | T | CC | 1.8% | 0.1% | 1.5% | 0.1% |
36 | TCCA | 269.919570 | 175.992920 | 116.648620 | 3.880000 | 3.910000 | 3.589000 | 2.1% | 2.0% | 1.8% | 2.2% | 2.3% | 2.1% | T | A | CC | 2.2% | 0.1% | 2.0% | 0.1% |
37 | CTCC | 275.130220 | 189.738970 | 138.358370 | 3.878000 | 3.627000 | 3.477000 | 2.1% | 2.1% | 2.1% | 2.2% | 2.1% | 2.1% | C | C | TC | 2.1% | 0.0% | 2.1% | 0.0% |
38 | GCTC | 72.491180 | 67.990160 | 33.839230 | 1.530000 | 1.648000 | 1.106000 | 0.6% | 0.8% | 0.5% | 0.9% | 1.0% | 0.7% | G | C | CT | 0.8% | 0.1% | 0.6% | 0.1% |
39 | TTCG | 257.791400 | 227.278840 | 148.817130 | 4.630000 | 4.608000 | 4.313000 | 2.0% | 2.6% | 2.3% | 2.6% | 2.7% | 2.6% | T | G | TC | 2.6% | 0.1% | 2.3% | 0.2% |
40 | CTTC | 156.862000 | 170.879580 | 130.168260 | 3.327000 | 3.130000 | 2.838000 | 1.2% | 1.9% | 2.0% | 1.9% | 1.8% | 1.7% | C | C | TT | 1.8% | 0.1% | 1.7% | 0.4% |
41 | ACTT | 75.240950 | 133.540380 | 83.580440 | 2.398000 | 2.656000 | 2.331000 | 0.6% | 1.5% | 1.3% | 1.3% | 1.6% | 1.4% | A | T | CT | 1.4% | 0.1% | 1.1% | 0.4% |
42 | TTCA | 493.625720 | 331.527890 | 190.403650 | 7.108000 | 7.082000 | 6.478000 | 3.8% | 3.8% | 2.9% | 4.0% | 4.1% | 3.8% | T | A | TC | 4.0% | 0.1% | 3.5% | 0.4% |
43 | TTTC | 378.404450 | 259.597500 | 182.531700 | 4.965000 | 4.508000 | 4.391000 | 2.9% | 2.9% | 2.8% | 2.8% | 2.6% | 2.6% | T | C | TT | 2.7% | 0.1% | 2.9% | 0.1% |
44 | GTTT | 98.352020 | 99.378990 | 92.050770 | 1.853000 | 2.038000 | 2.700000 | 0.8% | 1.1% | 1.4% | 1.0% | 1.2% | 1.6% | G | T | TT | 1.3% | 0.2% | 1.1% | 0.3% |
45 | ATTG | 255.960948 | 193.344884 | 122.887681 | 3.624079 | 3.585279 | 3.906897 | 2.0% | 2.2% | 1.9% | 2.0% | 2.1% | 2.3% | A | G | TT | 2.1% | 0.1% | 2.0% | 0.1% |
46 | CCCA | 308.709258 | 198.839728 | 121.487361 | 3.700739 | 3.979266 | 3.254357 | 2.4% | 2.3% | 1.9% | 2.1% | 2.3% | 1.9% | C | A | CC | 2.1% | 0.2% | 2.2% | 0.2% |
47 | CCCC | 72.916029 | 114.635123 | 69.744088 | 1.758967 | 1.576932 | 1.304246 | 0.6% | 1.3% | 1.1% | 1.0% | 0.9% | 0.8% | C | C | CC | 0.9% | 0.1% | 1.0% | 0.3% |
48 | ACCC | 156.059323 | 105.223264 | 68.401488 | 1.861742 | 2.632816 | 1.658888 | 1.2% | 1.2% | 1.1% | 1.0% | 1.5% | 1.0% | A | C | CC | 1.2% | 0.2% | 1.1% | 0.1% |
49 | CTTA | 446.280186 | 306.304683 | 207.098117 | 5.723383 | 5.774860 | 5.407239 | 3.4% | 3.5% | 3.2% | 3.2% | 3.4% | 3.2% | C | A | TT | 3.3% | 0.1% | 3.4% | 0.1% |
50 | TCTT | 186.426749 | 129.225876 | 100.920556 | 2.223139 | 2.026077 | 2.197943 | 1.4% | 1.5% | 1.6% | 1.2% | 1.2% | 1.3% | T | T | CT | 1.2% | 0.0% | 1.5% | 0.0% |
51 | ATCT | 208.950013 | 158.456423 | 107.726667 | 2.521355 | 2.919441 | 2.537564 | 1.6% | 1.8% | 1.7% | 1.4% | 1.7% | 1.5% | A | T | TC | 1.5% | 0.1% | 1.7% | 0.1% |
52 | GCCA | 120.631189 | 106.233523 | 71.078207 | 1.740434 | 2.317627 | 1.739829 | 0.9% | 1.2% | 1.1% | 1.0% | 1.4% | 1.0% | G | A | CC | 1.1% | 0.2% | 1.1% | 0.1% |
53 | TCTG | 164.138137 | 99.182893 | 74.275016 | 1.097670 | 1.689218 | 1.389360 | 1.3% | 1.1% | 1.1% | 0.6% | 1.0% | 0.8% | T | G | CT | 0.8% | 0.2% | 1.2% | 0.1% |
54 | TTCT | 267.939215 | 245.021097 | 179.291777 | 3.799302 | 4.287560 | 4.382534 | 2.1% | 2.8% | 2.8% | 2.1% | 2.5% | 2.6% | T | T | TC | 2.4% | 0.2% | 2.5% | 0.3% |
55 | ATTC | 256.610328 | 179.725133 | 122.310228 | 3.045338 | 3.157803 | 2.925584 | 2.0% | 2.0% | 1.9% | 1.7% | 1.8% | 1.7% | A | C | TT | 1.8% | 0.1% | 2.0% | 0.1% |
56 | GCTA | 112.231723 | 103.360704 | 61.914428 | 1.480970 | 1.877347 | 1.473640 | 0.9% | 1.2% | 1.0% | 0.8% | 1.1% | 0.9% | G | A | CT | 0.9% | 0.1% | 1.0% | 0.1% |
57 | ATCG | 167.693233 | 125.284581 | 83.663979 | 2.203764 | 2.250649 | 2.061093 | 1.3% | 1.4% | 1.3% | 1.2% | 1.3% | 1.2% | A | G | TC | 1.3% | 0.0% | 1.3% | 0.1% |
58 | GTTA | 242.894870 | 167.730380 | 103.223732 | 2.788401 | 3.021878 | 2.520875 | 1.9% | 1.9% | 1.6% | 1.6% | 1.8% | 1.5% | G | A | TT | 1.6% | 0.1% | 1.8% | 0.1% |
59 | TCCG | 279.017488 | 168.366399 | 121.516611 | 3.244991 | 3.222811 | 3.084964 | 2.2% | 1.9% | 1.9% | 1.8% | 1.9% | 1.8% | T | G | CC | 1.8% | 0.0% | 2.0% | 0.1% |
60 | TTCC | 444.816494 | 258.385027 | 179.879294 | 5.262581 | 4.579110 | 4.404230 | 3.4% | 2.9% | 2.8% | 2.9% | 2.7% | 2.6% | T | C | TC | 2.7% | 0.1% | 3.0% | 0.3% |
61 | GTTC | 173.606280 | 115.841376 | 71.660205 | 2.063922 | 2.115709 | 1.814095 | 1.3% | 1.3% | 1.1% | 1.2% | 1.2% | 1.1% | G | C | TT | 1.2% | 0.1% | 1.3% | 0.1% |
62 | GTCG | 64.914408 | 62.017413 | 34.010131 | 0.824727 | 0.937688 | 0.786052 | 0.5% | 0.7% | 0.5% | 0.5% | 0.5% | 0.5% | G | G | TC | 0.5% | 0.0% | 0.6% | 0.1% |
63 | GCTG | 73.459724 | 85.678981 | 41.132565 | 0.836520 | 1.487300 | 0.935419 | 0.6% | 1.0% | 0.6% | 0.5% | 0.9% | 0.6% | G | G | CT | 0.6% | 0.2% | 0.7% | 0.2% |
/Users/Chen/Documents/research/all data/data analysis/FiltBB UVB.csv
4mers | origin1 | origin2 | origin3 | quantity1 | quantity2 | quantity3 | normalized origin1 | normalized origin2 | normalized origin3 | normalized quantity1 | normalized quantity2 | normalized quantity3 | 5end | 3end | CPD | quanti mean | quanti std | origin mean | origin std | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | GTTG | 40.486650 | 46.032510 | 32.584272 | 1.782591 | 0.945147 | 1.142673 | 0.5% | 0.6% | 0.8% | 1.0% | 0.6% | 0.8% | G | G | TT | 0.8% | 0.2% | 0.6% | 0.1% |
1 | ACCG | 135.603224 | 98.185606 | 68.417689 | 2.910705 | 2.140171 | 2.462246 | 1.7% | 1.2% | 1.7% | 1.6% | 1.3% | 1.6% | A | G | CC | 1.5% | 0.1% | 1.6% | 0.2% |
2 | TCTA | 247.800247 | 114.853924 | 60.224766 | 3.945167 | 2.677625 | 1.428581 | 3.2% | 1.5% | 1.5% | 2.2% | 1.6% | 0.9% | T | A | CT | 1.6% | 0.5% | 2.1% | 0.8% |
3 | CTCT | 82.591886 | 155.427762 | 101.462350 | 3.897804 | 2.534713 | 4.315385 | 1.1% | 2.0% | 2.6% | 2.2% | 1.6% | 2.9% | C | T | TC | 2.2% | 0.5% | 1.9% | 0.6% |
4 | ACTC | 96.634133 | 22.601075 | 20.076195 | 2.034479 | 1.551865 | 1.025059 | 1.2% | 0.3% | 0.5% | 1.1% | 1.0% | 0.7% | A | C | CT | 0.9% | 0.2% | 0.7% | 0.4% |
5 | GTCA | 22.185956 | 50.860292 | 30.254991 | 1.414447 | 0.899556 | 1.134067 | 0.3% | 0.6% | 0.8% | 0.8% | 0.6% | 0.8% | G | A | TC | 0.7% | 0.1% | 0.6% | 0.2% |
6 | CTCG | 149.465237 | 154.517188 | 94.697113 | 4.112016 | 2.746889 | 3.082827 | 1.9% | 2.0% | 2.4% | 2.3% | 1.7% | 2.0% | C | G | TC | 2.0% | 0.2% | 2.1% | 0.2% |
7 | TCTC | 73.827738 | 78.307796 | 43.240351 | 1.902077 | 1.488738 | 1.322441 | 1.0% | 1.0% | 1.1% | 1.1% | 0.9% | 0.9% | T | C | CT | 0.9% | 0.1% | 1.0% | 0.1% |
8 | GTCT | 50.454380 | 34.525172 | 18.180339 | 1.546850 | 1.088059 | 1.504121 | 0.6% | 0.4% | 0.5% | 0.9% | 0.7% | 1.0% | G | T | TC | 0.8% | 0.1% | 0.5% | 0.1% |
9 | TTTG | 176.319745 | 167.970120 | 93.175590 | 4.679302 | 3.471969 | 3.476787 | 2.3% | 2.1% | 2.4% | 2.6% | 2.1% | 2.3% | T | G | TT | 2.3% | 0.2% | 2.3% | 0.1% |
10 | TTTT | 94.271823 | 87.085689 | 44.714226 | 2.348801 | 1.891171 | 1.513684 | 1.2% | 1.1% | 1.1% | 1.3% | 1.2% | 1.0% | T | T | TT | 1.2% | 0.1% | 1.2% | 0.0% |
11 | ATTT | 37.814074 | 33.820911 | 21.322729 | 1.221764 | 0.922351 | 1.200046 | 0.5% | 0.4% | 0.5% | 0.7% | 0.6% | 0.8% | A | T | TT | 0.7% | 0.1% | 0.5% | 0.0% |
12 | ACTA | 109.485951 | 91.508637 | 44.829834 | 2.378941 | 1.927118 | 1.659984 | 1.4% | 1.2% | 1.1% | 1.3% | 1.2% | 1.1% | A | A | CT | 1.2% | 0.1% | 1.2% | 0.1% |
13 | ATCA | 145.496529 | 117.424420 | 62.982366 | 3.053872 | 2.390047 | 2.336026 | 1.9% | 1.5% | 1.6% | 1.7% | 1.5% | 1.5% | A | A | TC | 1.6% | 0.1% | 1.7% | 0.2% |
14 | CCTA | 172.165080 | 113.236731 | 60.907761 | 3.556571 | 2.375142 | 2.211718 | 2.2% | 1.4% | 1.6% | 2.0% | 1.5% | 1.5% | C | A | CT | 1.6% | 0.2% | 1.7% | 0.3% |
15 | CCCT | 126.076499 | 119.001418 | 62.704516 | 2.866570 | 2.305002 | 2.118966 | 1.6% | 1.5% | 1.6% | 1.6% | 1.4% | 1.4% | C | T | CC | 1.5% | 0.1% | 1.6% | 0.1% |
16 | GCCC | 84.471114 | 63.287867 | 35.610567 | 1.669565 | 1.314263 | 1.461092 | 1.1% | 0.8% | 0.9% | 0.9% | 0.8% | 1.0% | G | C | CC | 0.9% | 0.1% | 0.9% | 0.1% |
17 | CCCG | 157.902161 | 88.409187 | 51.273321 | 1.995727 | 1.601840 | 1.554801 | 2.0% | 1.1% | 1.3% | 1.1% | 1.0% | 1.0% | C | G | CC | 1.0% | 0.1% | 1.5% | 0.4% |
18 | TCCC | 89.635525 | 102.306389 | 57.462346 | 2.516726 | 1.856977 | 1.936329 | 1.2% | 1.3% | 1.5% | 1.4% | 1.1% | 1.3% | T | C | CC | 1.3% | 0.1% | 1.3% | 0.1% |
19 | GTCC | 58.430762 | 50.757149 | 28.890274 | 1.175477 | 0.915337 | 0.737239 | 0.8% | 0.6% | 0.7% | 0.7% | 0.6% | 0.5% | G | C | TC | 0.6% | 0.1% | 0.7% | 0.0% |
20 | GCCG | 66.122491 | 62.323126 | 35.135144 | 1.317567 | 1.077538 | 1.013585 | 0.9% | 0.8% | 0.9% | 0.7% | 0.7% | 0.7% | G | G | CC | 0.7% | 0.0% | 0.8% | 0.0% |
21 | ACTG | 77.904190 | 91.967370 | 47.553060 | 2.078000 | 1.948000 | 1.501000 | 1.0% | 1.2% | 1.2% | 1.2% | 1.2% | 1.0% | A | G | CT | 1.1% | 0.1% | 1.1% | 0.1% |
22 | ACCA | 138.199990 | 121.139990 | 71.782470 | 3.758000 | 3.768000 | 3.206000 | 1.8% | 1.5% | 1.8% | 2.1% | 2.3% | 2.1% | A | A | CC | 2.2% | 0.1% | 1.7% | 0.1% |
23 | ATTA | 263.179710 | 168.197090 | 112.168170 | 5.900000 | 3.391000 | 4.858000 | 3.4% | 2.1% | 2.9% | 3.3% | 2.1% | 3.2% | A | A | TT | 2.9% | 0.5% | 2.8% | 0.5% |
24 | TTTA | 388.483500 | 273.625850 | 162.425900 | 7.360000 | 6.383000 | 6.530000 | 5.0% | 3.5% | 4.1% | 4.1% | 3.9% | 4.3% | T | A | TT | 4.1% | 0.2% | 4.2% | 0.6% |
25 | CTTT | 141.914850 | 191.620670 | 119.810730 | 3.917000 | 3.455000 | 4.444000 | 1.8% | 2.4% | 3.1% | 2.2% | 2.1% | 2.9% | C | T | TT | 2.4% | 0.4% | 2.4% | 0.5% |
26 | GCTT | 94.147610 | 66.013230 | 31.302250 | 2.757000 | 1.716000 | 2.024000 | 1.2% | 0.8% | 0.8% | 1.5% | 1.1% | 1.3% | G | T | CT | 1.3% | 0.2% | 0.9% | 0.2% |
27 | CCTG | 117.182540 | 57.572690 | 82.292360 | 1.479000 | 1.437000 | 0.944000 | 1.5% | 0.7% | 2.1% | 0.8% | 0.9% | 0.6% | C | G | CT | 0.8% | 0.1% | 1.4% | 0.6% |
28 | TCCT | 71.653850 | 135.221120 | 78.493070 | 3.560000 | 2.767000 | 3.026000 | 0.9% | 1.7% | 2.0% | 2.0% | 1.7% | 2.0% | T | T | CC | 1.9% | 0.1% | 1.5% | 0.5% |
29 | ATCC | 203.098830 | 142.238090 | 21.254700 | 2.971000 | 2.933000 | 1.123000 | 2.6% | 1.8% | 0.5% | 1.6% | 1.8% | 0.7% | A | C | TC | 1.4% | 0.5% | 1.7% | 0.9% |
30 | CTCA | 247.256040 | 202.549630 | 112.960840 | 5.106000 | 4.642000 | 4.940000 | 3.2% | 2.6% | 2.9% | 2.8% | 2.9% | 3.3% | C | A | TC | 3.0% | 0.2% | 2.9% | 0.3% |
31 | CCTC | 43.887530 | 74.235420 | 39.680110 | 1.900000 | 1.654000 | 2.011000 | 0.6% | 0.9% | 1.0% | 1.1% | 1.0% | 1.3% | C | C | CT | 1.1% | 0.1% | 0.8% | 0.2% |
32 | GCCT | 110.450780 | 76.867300 | 29.919360 | 1.682000 | 1.822000 | 1.547000 | 1.4% | 1.0% | 0.8% | 0.9% | 1.1% | 1.0% | G | T | CC | 1.0% | 0.1% | 1.1% | 0.3% |
33 | CTTG | 111.853020 | 85.973460 | 50.354140 | 2.380000 | 2.186000 | 2.454000 | 1.4% | 1.1% | 1.3% | 1.3% | 1.3% | 1.6% | C | G | TT | 1.4% | 0.1% | 1.3% | 0.1% |
34 | CCTT | 53.477080 | 72.223530 | 39.745190 | 1.726000 | 1.492000 | 2.209000 | 0.7% | 0.9% | 1.0% | 1.0% | 0.9% | 1.5% | C | T | CT | 1.1% | 0.2% | 0.9% | 0.1% |
35 | ACCT | 180.304380 | 133.248400 | 62.900210 | 3.341000 | 3.163000 | 2.733000 | 2.3% | 1.7% | 1.6% | 1.9% | 1.9% | 1.8% | A | T | CC | 1.9% | 0.1% | 1.9% | 0.3% |
36 | TCCA | 211.807980 | 162.873940 | 79.380480 | 4.333000 | 4.096000 | 3.352000 | 2.7% | 2.1% | 2.0% | 2.4% | 2.5% | 2.2% | T | A | CC | 2.4% | 0.1% | 2.3% | 0.3% |
37 | CTCC | 153.281200 | 156.188210 | 85.739660 | 3.925000 | 3.353000 | 3.498000 | 2.0% | 2.0% | 2.2% | 2.2% | 2.1% | 2.3% | C | C | TC | 2.2% | 0.1% | 2.0% | 0.1% |
38 | GCTC | 47.614070 | 61.405840 | 22.717460 | 1.607000 | 1.696000 | 1.179000 | 0.6% | 0.8% | 0.6% | 0.9% | 1.0% | 0.8% | G | C | CT | 0.9% | 0.1% | 0.7% | 0.1% |
39 | TTCG | 128.267030 | 175.679870 | 89.567240 | 4.420000 | 4.090000 | 3.601000 | 1.7% | 2.2% | 2.3% | 2.5% | 2.5% | 2.4% | T | G | TC | 2.4% | 0.1% | 2.1% | 0.3% |
40 | CTTC | 63.171340 | 112.586500 | 69.748500 | 2.636000 | 2.562000 | 1.950000 | 0.8% | 1.4% | 1.8% | 1.5% | 1.6% | 1.3% | C | C | TT | 1.4% | 0.1% | 1.3% | 0.4% |
41 | ACTT | 46.323060 | 108.000540 | 46.373770 | 2.458000 | 2.198000 | 1.733000 | 0.6% | 1.4% | 1.2% | 1.4% | 1.4% | 1.1% | A | T | CT | 1.3% | 0.1% | 1.0% | 0.3% |
42 | TTCA | 259.959450 | 280.235080 | 122.166740 | 7.007000 | 6.517000 | 5.291000 | 3.3% | 3.5% | 3.1% | 3.9% | 4.0% | 3.5% | T | A | TC | 3.8% | 0.2% | 3.3% | 0.2% |
43 | TTTC | 163.210330 | 185.217910 | 92.152580 | 4.125000 | 3.789000 | 4.739000 | 2.1% | 2.3% | 2.4% | 2.3% | 2.3% | 3.1% | T | C | TT | 2.6% | 0.4% | 2.3% | 0.1% |
44 | GTTT | 43.621640 | 71.385900 | 39.923990 | 1.545000 | 1.483000 | 1.440000 | 0.6% | 0.9% | 1.0% | 0.9% | 0.9% | 1.0% | G | T | TT | 0.9% | 0.0% | 0.8% | 0.2% |
45 | ATTG | 115.781598 | 187.836782 | 68.942403 | 2.839360 | 3.290458 | 2.602073 | 1.5% | 2.4% | 1.8% | 1.6% | 2.0% | 1.7% | A | G | TT | 1.8% | 0.2% | 1.9% | 0.4% |
46 | CCCA | 170.207052 | 212.387274 | 81.669584 | 3.538504 | 4.077260 | 3.018794 | 2.2% | 2.7% | 2.1% | 2.0% | 2.5% | 2.0% | C | A | CC | 2.2% | 0.2% | 2.3% | 0.3% |
47 | CCCC | 29.958789 | 157.278120 | 55.261468 | 1.434228 | 1.823209 | 1.788980 | 0.4% | 2.0% | 1.4% | 0.8% | 1.1% | 1.2% | C | C | CC | 1.0% | 0.2% | 1.3% | 0.7% |
48 | ACCC | 114.276567 | 36.630414 | 22.434517 | 1.853543 | 1.999019 | 1.715545 | 1.5% | 0.5% | 0.6% | 1.0% | 1.2% | 1.1% | A | C | CC | 1.1% | 0.1% | 0.8% | 0.5% |
49 | CTTA | 187.945061 | 269.605715 | 95.545604 | 4.191437 | 4.416941 | 3.809767 | 2.4% | 3.4% | 2.4% | 2.3% | 2.7% | 2.5% | C | A | TT | 2.5% | 0.2% | 2.8% | 0.5% |
50 | TCTT | 86.597126 | 204.412618 | 45.210934 | 2.123958 | 2.071730 | 1.767746 | 1.1% | 2.6% | 1.2% | 1.2% | 1.3% | 1.2% | T | T | CT | 1.2% | 0.0% | 1.6% | 0.7% |
51 | ATCT | 132.147581 | 65.421903 | 73.218677 | 2.896695 | 3.309993 | 2.761329 | 1.7% | 0.8% | 1.9% | 1.6% | 2.0% | 1.8% | A | T | TC | 1.8% | 0.2% | 1.5% | 0.5% |
52 | GCCA | 75.617619 | 121.213170 | 46.835221 | 1.815890 | 2.230175 | 1.681925 | 1.0% | 1.5% | 1.2% | 1.0% | 1.4% | 1.1% | G | A | CC | 1.2% | 0.2% | 1.2% | 0.2% |
53 | TCTG | 49.625646 | 70.571358 | 39.782595 | 0.966136 | 1.495466 | 1.191769 | 0.6% | 0.9% | 1.0% | 0.5% | 0.9% | 0.8% | T | G | CT | 0.7% | 0.2% | 0.8% | 0.2% |
54 | TTCT | 146.360140 | 208.595291 | 96.355957 | 3.238137 | 3.775562 | 3.544340 | 1.9% | 2.6% | 2.5% | 1.8% | 2.3% | 2.3% | T | T | TC | 2.2% | 0.3% | 2.3% | 0.3% |
55 | ATTC | 94.178399 | 121.051848 | 51.226719 | 2.090584 | 2.245369 | 1.914616 | 1.2% | 1.5% | 1.3% | 1.2% | 1.4% | 1.3% | A | C | TT | 1.3% | 0.1% | 1.4% | 0.1% |
56 | GCTA | 70.201337 | 97.454615 | 38.411979 | 1.445353 | 1.677786 | 1.394378 | 0.9% | 1.2% | 1.0% | 0.8% | 1.0% | 0.9% | G | A | CT | 0.9% | 0.1% | 1.0% | 0.1% |
57 | ATCG | 92.790773 | 128.554705 | 50.632980 | 2.231782 | 2.250795 | 1.942043 | 1.2% | 1.6% | 1.3% | 1.2% | 1.4% | 1.3% | A | G | TC | 1.3% | 0.1% | 1.4% | 0.2% |
58 | GTTA | 203.342072 | 189.612783 | 70.451854 | 2.933492 | 2.973567 | 2.356110 | 2.6% | 2.4% | 1.8% | 1.6% | 1.8% | 1.6% | G | A | TT | 1.7% | 0.1% | 2.3% | 0.3% |
59 | TCCG | 168.170328 | 213.101819 | 79.705543 | 3.268944 | 3.856955 | 3.045336 | 2.2% | 2.7% | 2.0% | 1.8% | 2.4% | 2.0% | T | G | CC | 2.1% | 0.2% | 2.3% | 0.3% |
60 | TTCC | 230.667043 | 268.240322 | 113.053889 | 4.674077 | 4.817396 | 4.179596 | 3.0% | 3.4% | 2.9% | 2.6% | 3.0% | 2.8% | T | C | TC | 2.8% | 0.2% | 3.1% | 0.2% |
61 | GTTC | 104.373714 | 116.973540 | 49.050451 | 2.146207 | 2.534044 | 1.850913 | 1.3% | 1.5% | 1.3% | 1.2% | 1.6% | 1.2% | G | C | TT | 1.3% | 0.2% | 1.4% | 0.1% |
62 | GTCG | 53.409603 | 68.781144 | 28.329351 | 1.131294 | 1.280587 | 0.703382 | 0.7% | 0.9% | 0.7% | 0.6% | 0.8% | 0.5% | G | G | TC | 0.6% | 0.1% | 0.8% | 0.1% |
63 | GCTG | 64.706729 | 100.345658 | 34.297618 | 1.265646 | 1.942586 | 0.706921 | 0.8% | 1.3% | 0.9% | 0.7% | 1.2% | 0.5% | G | G | CT | 0.8% | 0.3% | 1.0% | 0.2% |
/Users/Chen/Documents/research/all data/data analysis/NB UVB.csv
4mers | origin1 | origin2 | origin3 | quantity1 | quantity2 | quantity3 | normalized origin1 | normalized origin2 | normalized origin3 | normalized quantity1 | normalized quantity2 | normalized quantity3 | 5end | 3end | CPD | quanti mean | quanti std | origin mean | origin std | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | GTTG | 25.330738 | 32.552377 | 29.499206 | 1.754515 | 0.836927 | 1.264996 | 0.4% | 0.6% | 0.9% | 1.0% | 0.6% | 0.8% | G | G | TT | 0.8% | 0.2% | 0.6% | 0.2% |
1 | ACCG | 88.020967 | 67.662505 | 57.801266 | 2.695238 | 1.835261 | 2.309552 | 1.5% | 1.3% | 1.7% | 1.5% | 1.4% | 1.5% | A | G | CC | 1.5% | 0.1% | 1.5% | 0.2% |
2 | TCTA | 162.249970 | 71.271443 | 50.110593 | 3.780294 | 1.918954 | 2.212478 | 2.7% | 1.3% | 1.4% | 2.2% | 1.4% | 1.5% | T | A | CT | 1.7% | 0.3% | 1.8% | 0.6% |
3 | CTCT | 45.851506 | 97.077087 | 90.981378 | 3.985022 | 2.155087 | 3.102323 | 0.8% | 1.8% | 2.6% | 2.3% | 1.6% | 2.0% | C | T | TC | 2.0% | 0.3% | 1.7% | 0.8% |
4 | ACTC | 71.315987 | 21.351595 | 19.014187 | 1.481204 | 0.993104 | 1.461166 | 1.2% | 0.4% | 0.5% | 0.8% | 0.7% | 1.0% | A | C | CT | 0.9% | 0.1% | 0.7% | 0.4% |
5 | GTCA | 18.922937 | 34.428317 | 30.425975 | 1.285689 | 0.659827 | 1.201291 | 0.3% | 0.6% | 0.9% | 0.7% | 0.5% | 0.8% | G | A | TC | 0.7% | 0.1% | 0.6% | 0.2% |
6 | CTCG | 50.694604 | 96.032208 | 85.056988 | 3.923604 | 2.516012 | 3.238833 | 0.9% | 1.8% | 2.5% | 2.3% | 1.9% | 2.1% | C | G | TC | 2.1% | 0.2% | 1.7% | 0.7% |
7 | TCTC | 52.568620 | 41.711657 | 37.195671 | 1.741208 | 0.804048 | 1.147698 | 0.9% | 0.8% | 1.1% | 1.0% | 0.6% | 0.8% | T | C | CT | 0.8% | 0.2% | 0.9% | 0.1% |
8 | GTCT | 163.304129 | 27.588033 | 20.264566 | 1.474038 | 1.034203 | 1.524871 | 2.8% | 0.5% | 0.6% | 0.8% | 0.8% | 1.0% | G | T | TC | 0.9% | 0.1% | 1.3% | 1.0% |
9 | TTTG | 117.232921 | 110.228439 | 93.257704 | 4.811098 | 3.187048 | 3.908240 | 2.0% | 2.0% | 2.7% | 2.8% | 2.4% | 2.6% | T | G | TT | 2.6% | 0.2% | 2.2% | 0.3% |
10 | TTTT | 67.664017 | 56.301531 | 46.036946 | 2.440352 | 1.531875 | 2.019341 | 1.1% | 1.0% | 1.3% | 1.4% | 1.1% | 1.3% | T | T | TT | 1.3% | 0.1% | 1.2% | 0.1% |
11 | ATTT | 26.464673 | 27.223727 | 21.849122 | 1.353249 | 0.842905 | 1.257917 | 0.4% | 0.5% | 0.6% | 0.8% | 0.6% | 0.8% | A | T | TT | 0.7% | 0.1% | 0.5% | 0.1% |
12 | ACTA | 64.026096 | 51.374090 | 39.398485 | 2.196727 | 1.445194 | 1.646213 | 1.1% | 1.0% | 1.1% | 1.3% | 1.1% | 1.1% | A | A | CT | 1.1% | 0.1% | 1.1% | 0.1% |
13 | ATCA | 85.592236 | 67.252037 | 56.548662 | 2.632796 | 1.828536 | 2.328765 | 1.4% | 1.2% | 1.6% | 1.5% | 1.4% | 1.5% | A | A | TC | 1.5% | 0.1% | 1.4% | 0.2% |
14 | CCTA | 102.644344 | 64.550629 | 54.383534 | 3.174301 | 1.756800 | 2.116415 | 1.7% | 1.2% | 1.6% | 1.8% | 1.3% | 1.4% | C | A | CT | 1.5% | 0.2% | 1.5% | 0.2% |
15 | CCCT | 75.769261 | 64.188821 | 54.050116 | 2.361532 | 1.664140 | 1.953614 | 1.3% | 1.2% | 1.6% | 1.4% | 1.2% | 1.3% | C | T | CC | 1.3% | 0.0% | 1.3% | 0.2% |
16 | GCCC | 48.091683 | 35.719993 | 31.833919 | 1.414667 | 0.988620 | 1.474311 | 0.8% | 0.7% | 0.9% | 0.8% | 0.7% | 1.0% | G | C | CC | 0.8% | 0.1% | 0.8% | 0.1% |
17 | CCCG | 106.349066 | 48.505925 | 48.175418 | 1.911132 | 1.194116 | 1.617900 | 1.8% | 0.9% | 1.4% | 1.1% | 0.9% | 1.1% | C | G | CC | 1.0% | 0.1% | 1.4% | 0.4% |
18 | TCCC | 52.342032 | 54.355977 | 52.002684 | 2.152710 | 1.388402 | 1.887887 | 0.9% | 1.0% | 1.5% | 1.2% | 1.0% | 1.2% | T | C | CC | 1.2% | 0.1% | 1.1% | 0.3% |
19 | GTCC | 40.124890 | 28.864690 | 26.211041 | 1.055371 | 0.692707 | 0.819062 | 0.7% | 0.5% | 0.8% | 0.6% | 0.5% | 0.5% | G | C | TC | 0.6% | 0.0% | 0.7% | 0.1% |
20 | GCCG | 51.438897 | 34.593206 | 32.286525 | 1.160805 | 0.839169 | 0.916136 | 0.9% | 0.6% | 0.9% | 0.7% | 0.6% | 0.6% | G | G | CC | 0.6% | 0.0% | 0.8% | 0.1% |
21 | ACTG | 27.465550 | 74.601010 | 41.859360 | 1.903000 | 1.044000 | 1.444000 | 0.5% | 1.4% | 1.2% | 1.1% | 0.8% | 1.0% | A | G | CT | 0.9% | 0.1% | 1.0% | 0.4% |
22 | ACCA | 93.667579 | 77.872440 | 65.039750 | 3.122000 | 3.198000 | 2.889000 | 1.6% | 1.4% | 1.9% | 1.8% | 2.4% | 1.9% | A | A | CC | 2.0% | 0.3% | 1.6% | 0.2% |
23 | ATTA | 134.620158 | 105.852150 | 105.529290 | 5.873000 | 2.532000 | 5.108000 | 2.3% | 2.0% | 3.0% | 3.4% | 1.9% | 3.4% | A | A | TT | 2.9% | 0.7% | 2.4% | 0.5% |
24 | TTTA | 285.863999 | 168.655910 | 156.118930 | 7.827000 | 5.467000 | 8.241000 | 4.8% | 3.1% | 4.5% | 4.5% | 4.1% | 5.4% | T | A | TT | 4.7% | 0.6% | 4.2% | 0.7% |
25 | CTTT | 183.002295 | 122.884010 | 114.257070 | 3.815000 | 3.192000 | 3.636000 | 3.1% | 2.3% | 3.3% | 2.2% | 2.4% | 2.4% | C | T | TT | 2.3% | 0.1% | 2.9% | 0.4% |
26 | GCTT | 54.117350 | 37.834770 | 28.664340 | 2.345000 | 1.420000 | 2.760000 | 0.9% | 0.7% | 0.8% | 1.3% | 1.1% | 1.8% | G | T | CT | 1.4% | 0.3% | 0.8% | 0.1% |
27 | CCTG | 30.007320 | 35.595130 | 35.829670 | 1.813000 | 3.609000 | 3.076000 | 0.5% | 0.7% | 1.0% | 1.0% | 2.7% | 2.0% | C | G | CT | 1.9% | 0.7% | 0.7% | 0.2% |
28 | TCCT | 116.396340 | 82.258870 | 63.473480 | 2.973000 | 2.472000 | 3.399000 | 2.0% | 1.5% | 1.8% | 1.7% | 1.9% | 2.2% | T | T | CC | 1.9% | 0.2% | 1.8% | 0.2% |
29 | ATCC | 97.970280 | 84.408610 | 56.474590 | 3.031000 | 1.739000 | 1.610000 | 1.7% | 1.6% | 1.6% | 1.7% | 1.3% | 1.1% | A | C | TC | 1.4% | 0.3% | 1.6% | 0.0% |
30 | CTCA | 174.943370 | 133.441580 | 103.278890 | 5.200000 | 3.986000 | 5.095000 | 3.0% | 2.5% | 3.0% | 3.0% | 3.0% | 3.4% | C | A | TC | 3.1% | 0.2% | 2.8% | 0.2% |
31 | CCTC | 46.226460 | 41.802180 | 36.976580 | 1.707000 | 1.464000 | 1.633000 | 0.8% | 0.8% | 1.1% | 1.0% | 1.1% | 1.1% | C | C | CT | 1.1% | 0.1% | 0.9% | 0.1% |
32 | GCCT | 37.999180 | 42.646480 | 22.844000 | 1.899000 | 1.691000 | 1.271000 | 0.6% | 0.8% | 0.7% | 1.1% | 1.3% | 0.8% | G | T | CC | 1.1% | 0.2% | 0.7% | 0.1% |
33 | CTTG | 75.502720 | 53.956910 | 48.573480 | 2.681000 | 1.909000 | 1.768000 | 1.3% | 1.0% | 1.4% | 1.5% | 1.4% | 1.2% | C | G | TT | 1.4% | 0.2% | 1.2% | 0.2% |
34 | CCTT | 49.809200 | 41.320670 | 37.332990 | 1.582000 | 1.477000 | 1.664000 | 0.8% | 0.8% | 1.1% | 0.9% | 1.1% | 1.1% | C | T | CT | 1.0% | 0.1% | 0.9% | 0.1% |
35 | ACCT | 98.591210 | 80.259520 | 48.013900 | 3.114000 | 2.471000 | 1.431000 | 1.7% | 1.5% | 1.4% | 1.8% | 1.9% | 0.9% | A | T | CC | 1.5% | 0.4% | 1.5% | 0.1% |
36 | TCCA | 137.533200 | 102.074510 | 70.119940 | 3.868000 | 3.389000 | 3.405000 | 2.3% | 1.9% | 2.0% | 2.2% | 2.5% | 2.2% | T | A | CC | 2.3% | 0.1% | 2.1% | 0.2% |
37 | CTCC | 130.515650 | 97.884400 | 77.174680 | 3.888000 | 2.993000 | 3.787000 | 2.2% | 1.8% | 2.2% | 2.2% | 2.2% | 2.5% | C | C | TC | 2.3% | 0.1% | 2.1% | 0.2% |
38 | GCTC | 25.451250 | 31.130840 | 20.857510 | 1.427000 | 1.049000 | 1.336000 | 0.4% | 0.6% | 0.6% | 0.8% | 0.8% | 0.9% | G | C | CT | 0.8% | 0.0% | 0.5% | 0.1% |
39 | TTCG | 114.222640 | 113.120770 | 79.971060 | 4.411000 | 3.549000 | 2.324000 | 1.9% | 2.1% | 2.3% | 2.5% | 2.7% | 1.5% | T | G | TC | 2.2% | 0.5% | 2.1% | 0.2% |
40 | CTTC | 59.004350 | 70.106500 | 65.656320 | 2.564000 | 2.104000 | 2.003000 | 1.0% | 1.3% | 1.9% | 1.5% | 1.6% | 1.3% | C | C | TT | 1.5% | 0.1% | 1.4% | 0.4% |
41 | ACTT | 38.997490 | 67.877770 | 42.343880 | 2.402000 | 1.679000 | 1.818000 | 0.7% | 1.3% | 1.2% | 1.4% | 1.3% | 1.2% | A | T | CT | 1.3% | 0.1% | 1.0% | 0.3% |
42 | TTCA | 218.981510 | 173.590080 | 109.084940 | 6.528000 | 5.838000 | 6.387000 | 3.7% | 3.2% | 3.1% | 3.7% | 4.4% | 4.2% | T | A | TC | 4.1% | 0.3% | 3.4% | 0.2% |
43 | TTTC | 144.734660 | 123.165650 | 86.440580 | 4.345000 | 3.147000 | 3.292000 | 2.4% | 2.3% | 2.5% | 2.5% | 2.4% | 2.2% | T | C | TT | 2.3% | 0.1% | 2.4% | 0.1% |
44 | GTTT | 44.948630 | 39.057430 | 37.248270 | 1.541000 | 1.465000 | 1.444000 | 0.8% | 0.7% | 1.1% | 0.9% | 1.1% | 1.0% | G | T | TT | 1.0% | 0.1% | 0.9% | 0.2% |
45 | ATTG | 114.126677 | 104.247500 | 66.130970 | 3.051393 | 2.546122 | 2.868134 | 1.9% | 1.9% | 1.9% | 1.8% | 1.9% | 1.9% | A | G | TT | 1.9% | 0.1% | 1.9% | 0.0% |
46 | CCCA | 150.011746 | 163.909636 | 67.981976 | 3.476966 | 3.012389 | 2.660514 | 2.5% | 3.0% | 2.0% | 2.0% | 2.3% | 1.8% | C | A | CC | 2.0% | 0.2% | 2.5% | 0.4% |
47 | CCCC | 43.677904 | 74.903700 | 53.670889 | 1.390089 | 1.989191 | 1.615406 | 0.7% | 1.4% | 1.5% | 0.8% | 1.5% | 1.1% | C | C | CC | 1.1% | 0.3% | 1.2% | 0.4% |
48 | ACCC | 74.647771 | 77.981517 | 10.725732 | 1.784267 | 2.021571 | 2.231258 | 1.3% | 1.4% | 0.3% | 1.0% | 1.5% | 1.5% | A | C | CC | 1.3% | 0.2% | 1.0% | 0.5% |
49 | CTTA | 179.369340 | 219.294514 | 87.306911 | 4.339450 | 3.690203 | 3.863309 | 3.0% | 4.1% | 2.5% | 2.5% | 2.8% | 2.6% | C | A | TT | 2.6% | 0.1% | 3.2% | 0.6% |
50 | TCTT | 79.472533 | 143.429299 | 38.173938 | 2.146179 | 2.125186 | 1.958811 | 1.3% | 2.7% | 1.1% | 1.2% | 1.6% | 1.3% | T | T | CT | 1.4% | 0.2% | 1.7% | 0.7% |
51 | ATCT | 116.363350 | 119.710926 | 61.347383 | 2.801108 | 2.398254 | 2.703440 | 2.0% | 2.2% | 1.8% | 1.6% | 1.8% | 1.8% | A | T | TC | 1.7% | 0.1% | 2.0% | 0.2% |
52 | GCCA | 63.458329 | 85.878798 | 39.988031 | 1.816534 | 2.511583 | 1.613654 | 1.1% | 1.6% | 1.2% | 1.0% | 1.9% | 1.1% | G | A | CC | 1.3% | 0.4% | 1.3% | 0.2% |
53 | TCTG | 45.784430 | 73.712545 | 29.566623 | 0.926145 | 1.159192 | 1.111687 | 0.8% | 1.4% | 0.9% | 0.5% | 0.9% | 0.7% | T | G | CT | 0.7% | 0.1% | 1.0% | 0.3% |
54 | TTCT | 128.475926 | 152.011295 | 73.815905 | 3.143834 | 2.633547 | 3.039837 | 2.2% | 2.8% | 2.1% | 1.8% | 2.0% | 2.0% | T | T | TC | 1.9% | 0.1% | 2.4% | 0.3% |
55 | ATTC | 90.413589 | 92.965009 | 45.221743 | 2.256060 | 1.465720 | 2.946977 | 1.5% | 1.7% | 1.3% | 1.3% | 1.1% | 1.9% | A | C | TT | 1.4% | 0.4% | 1.5% | 0.2% |
56 | GCTA | 60.128581 | 80.100394 | 34.145873 | 1.447646 | 1.172144 | 2.401208 | 1.0% | 1.5% | 1.0% | 0.8% | 0.9% | 1.6% | G | A | CT | 1.1% | 0.3% | 1.2% | 0.2% |
57 | ATCG | 81.054713 | 100.241101 | 40.915822 | 2.221177 | 1.531559 | 1.681985 | 1.4% | 1.9% | 1.2% | 1.3% | 1.1% | 1.1% | A | G | TC | 1.2% | 0.1% | 1.5% | 0.3% |
58 | GTTA | 197.726361 | 141.527828 | 59.521099 | 2.962442 | 2.055030 | 2.169059 | 3.3% | 2.6% | 1.7% | 1.7% | 1.5% | 1.4% | G | A | TT | 1.6% | 0.1% | 2.6% | 0.7% |
59 | TCCG | 150.239553 | 160.610002 | 54.844550 | 3.096741 | 2.839697 | 2.292580 | 2.5% | 3.0% | 1.6% | 1.8% | 2.1% | 1.5% | T | G | CC | 1.8% | 0.3% | 2.4% | 0.6% |
60 | TTCC | 203.838401 | 214.636336 | 82.509649 | 4.455436 | 3.738773 | 3.483110 | 3.4% | 4.0% | 2.4% | 2.6% | 2.8% | 2.3% | T | C | TC | 2.6% | 0.2% | 3.3% | 0.7% |
61 | GTTC | 97.085824 | 98.333457 | 38.633978 | 2.105191 | 1.124654 | 1.110811 | 1.6% | 1.8% | 1.1% | 1.2% | 0.8% | 0.7% | G | C | TT | 0.9% | 0.2% | 1.5% | 0.3% |
62 | GTCG | 46.242822 | 58.577817 | 22.084397 | 1.006376 | 0.728543 | 0.637753 | 0.8% | 1.1% | 0.6% | 0.6% | 0.5% | 0.4% | G | G | TC | 0.5% | 0.1% | 0.8% | 0.2% |
63 | GCTG | 50.887918 | 103.665326 | 27.535602 | 1.241836 | 1.574731 | 0.749009 | 0.9% | 1.9% | 0.8% | 0.7% | 1.2% | 0.5% | G | G | CT | 0.8% | 0.3% | 1.2% | 0.5% |
%matplotlib inline
names = ["UVC","BB UVB","FiltBB UVB","NB UVB"]
for j in range(len(names)):
filepath = '/Users/Chen/Documents/research/all data/data analysis/'
filename = filepath + '%s.csv' %(names[j])
df = construct_df(filename)
def transform_df_sensitizer(df, PP, sensitizer):
names = ['A','C','G','T']
pypy = df[df['CPD'] == PP].sort(['5end','3end'], ascending=True)[sensitizer +' mean'].reshape(4,4)
pypy = pd.DataFrame(pypy)
three_ends = pypy.sum(axis = 0)
five_ends = pypy.sum(axis = 1)
columns = [ name + ': {:.0%}'.format(three_end) for (name , three_end)in zip(names , three_ends)]
pypy.columns = columns
index = [ name + ': {:.0%}'.format(five_end) for (name , five_end)in zip(names , five_ends)]
pypy.index = index
pypy.index.name = '5\'X'
pypy.columns.name = '3\'Y'
return pypy
#read file
filepath = '/Users/Chen/Documents/research/all data/data analysis/'
filename = filepath + '%s.csv' %('sensitizer_lane_singlebandnorm')
sensitizer = pd.read_csv(filename,skiprows = 1 ,names = ['4mers','acetone1','acetone2','acetone3','nfx1','nfx2','nfx3'])
#construct full table for sensitizer data
sensitizer["acetone mean"] = np.mean(sensitizer[["acetone1",'acetone2','acetone3']], axis = 1)
sensitizer["nfx mean"] = np.mean(sensitizer[["nfx1",'nfx2','nfx3']], axis = 1)
sensitizer["acetone std"] = np.std(sensitizer[["acetone1",'acetone2','acetone3']], axis = 1)
sensitizer["nfx std"] = np.std(sensitizer[["nfx1",'nfx2','nfx3']], axis = 1)
sensitizer['5end'] = sensitizer['4mers'].apply( lambda x: x[0])
sensitizer['3end'] = sensitizer['4mers'].apply( lambda x: x[-1])
sensitizer = sensitizer.sort(['5end','3end'], ascending=True)
sensitizer['CPD'] = sensitizer['4mers'].apply( lambda x: x[1:3])
sensitizer.style.format({
'acetone1': '{:,.1%}'.format,
'acetone2': '{:,.1%}'.format,
'acetone3': '{:,.1%}'.format,
'nfx1': '{:,.1%}'.format,
'nfx2': '{:,.1%}'.format,
'nfx3': '{:,.1%}'.format,
'acetone mean': '{:,.1%}'.format,
'nfx mean': '{:,.1%}'.format,
})
/Users/Chen/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:13: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
4mers | acetone1 | acetone2 | acetone3 | nfx1 | nfx2 | nfx3 | acetone mean | nfx mean | acetone std | nfx std | 5end | 3end | CPD | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
4 | ATTA | 9.4% | 19.3% | 10.7% | 4.8% | 2.9% | 3.1% | 13.1% | 3.6% | 0.0438034 | 0.00849512 | A | A | TT |
13 | ATTC | 7.9% | 3.8% | 7.6% | 7.1% | 7.4% | 6.9% | 6.4% | 7.2% | 0.0189469 | 0.00224729 | A | C | TT |
11 | ATTG | 5.0% | 1.8% | 4.8% | 11.9% | 11.9% | 14.6% | 3.9% | 12.8% | 0.0144499 | 0.0125494 | A | G | TT |
3 | ATTT | 0.8% | 1.2% | 1.7% | 1.9% | 2.8% | 1.8% | 1.2% | 2.2% | 0.00381707 | 0.0041309 | A | T | TT |
12 | CTTA | 6.2% | 3.1% | 6.6% | 10.6% | 11.0% | 9.6% | 5.3% | 10.4% | 0.0157305 | 0.00555296 | C | A | TT |
8 | CTTC | 6.1% | 7.3% | 5.2% | 2.0% | 1.4% | 1.6% | 6.2% | 1.7% | 0.00881926 | 0.00218635 | C | C | TT |
7 | CTTG | 5.0% | 6.3% | 4.8% | 3.6% | 2.0% | 2.0% | 5.4% | 2.5% | 0.00652288 | 0.00727735 | C | G | TT |
6 | CTTT | 4.3% | 6.8% | 5.4% | 5.3% | 2.1% | 8.3% | 5.5% | 5.2% | 0.0103573 | 0.0252486 | C | T | TT |
14 | GTTA | 4.8% | 2.2% | 5.8% | 9.3% | 9.7% | 8.8% | 4.3% | 9.2% | 0.0150076 | 0.00370459 | G | A | TT |
15 | GTTC | 5.7% | 2.6% | 6.2% | 2.3% | 1.2% | 1.8% | 4.8% | 1.8% | 0.0159415 | 0.00448104 | G | C | TT |
0 | GTTG | 7.3% | 2.8% | 4.4% | 7.3% | 4.3% | 2.7% | 4.8% | 4.7% | 0.0188525 | 0.0193397 | G | G | TT |
10 | GTTT | 3.0% | 1.8% | 1.6% | 1.1% | 1.3% | 1.5% | 2.1% | 1.3% | 0.00619467 | 0.00135626 | G | T | TT |
5 | TTTA | 11.3% | 11.6% | 11.0% | 15.0% | 21.1% | 18.9% | 11.3% | 18.4% | 0.00281232 | 0.0251228 | T | A | TT |
9 | TTTC | 8.3% | 9.5% | 6.6% | 3.6% | 1.8% | 2.8% | 8.1% | 2.8% | 0.0121271 | 0.00750034 | T | C | TT |
1 | TTTG | 11.6% | 17.7% | 15.2% | 10.4% | 15.6% | 13.2% | 14.8% | 13.0% | 0.025302 | 0.0211609 | T | G | TT |
2 | TTTT | 3.2% | 2.1% | 2.6% | 3.7% | 3.6% | 2.5% | 2.6% | 3.2% | 0.00452555 | 0.00540667 | T | T | TT |
max_y = 0.22
sensitizer = sensitizer.sort(["acetone mean"], ascending=False)
fig = plt.figure()
ax = fig.add_subplot(111)
## the data
N = 16
## necessary variables
ind = np.arange(N) # the x locations for the groups
width = 0.4 # the width of the bars
## the bars
rects1 = ax.bar(ind+width, sensitizer["acetone mean"], width,
color='black',
yerr= sensitizer["acetone std"],
error_kw=dict(elinewidth=0.5,ecolor='red'))
rects2 = ax.bar(ind+2*width, sensitizer["nfx mean"], width,
color='red',
yerr= sensitizer["nfx std"],
error_kw=dict(elinewidth=0.5,ecolor='black'))
# ## the bars
# rects1 = ax.bar(ind, origin_mean, width,
# color='black',
# yerr=origin_std,
# error_kw=dict(elinewidth=0.5,ecolor='red'))
# rects2 = ax.bar(ind+width, quantity_mean, width,
# color='red',
# yerr= quantity_std,
# error_kw=dict(elinewidth=0.5,ecolor='black'))
# axes and labels
ax.set_xlim(0,len(ind)+width)
ax.set_ylim(0, max_y)
ax.set_ylabel('normalized relative Qty', fontsize = 20)
ax.set_title("Photosensitizer Effect on TT CPD Distribution", fontsize = 20)
xTickMarks = sensitizer['4mers']
ax.set_xticks(ind+2*width)
xtickNames = ax.set_xticklabels(xTickMarks)
plt.setp(xtickNames, rotation=90)
vals = ax.get_yticks()
ax.set_yticklabels(['{:.0f}%'.format(x*100) for x in vals])
plt.tick_params(axis='both', which='major', labelsize=14)
plt.tick_params(axis='both', which='minor', labelsize=14)
## add a legend
ax.legend( (rects1[0], rects2[0]), ("acetone ","norfloxacin"), fontsize = 14 )
filename = "sensitizer"
plt.savefig(filepath + '%s_barplot.pdf'%(filename), format = 'pdf',dpi=300)
/Users/Chen/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:2: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....) from ipykernel import kernelapp as app
max_y = 0.2
acetone = transform_df_sensitizer(sensitizer, 'TT', 'acetone')
nfx = transform_df_sensitizer(sensitizer, 'TT', 'nfx')
fig = plt.figure(figsize=(32,24 ))
i = 0
fig.subplots_adjust(wspace=.3 )
all_data = [acetone, nfx]
names = ["2.7M acetone + 20s NB UVB","300uM Norfloxacin + 30min UVA"]
for data in all_data :
sns.set(font_scale=3.5)
ax = fig.add_subplot(221 + i)
ax = sns.heatmap(data,vmin=0, vmax=max_y, fmt = '.0%',annot=True, annot_kws={"size":40})
total = np.sum(np.sum(data))
ax.set_title(names[i] + '\n TT : {:.0%}'.format(total), fontsize = 40)
i=i+1
#filename = 'sensitizer effect'
filename = 'sensitizer'
plt.savefig(filepath + '%s_HEATMAP.pdf'%(filename), format = 'pdf',dpi=300)
/Users/Chen/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:3: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....) app.launch_new_instance() /Users/Chen/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:3: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead app.launch_new_instance()