Direct Excitation

In [ ]:
import pandas as pd
import numpy as np
from IPython.core.display import display, HTML
import matplotlib.pyplot as plt
import seaborn as sns
In [ ]:
def construct_df(filename):
    
    df = pd.read_csv(filename,skiprows = 1 ,names = ['4mers','origin1','origin2','origin3','quantity1','quantity2','quantity3'])

    
    origin1 = df['origin1']/df['origin1'].sum()
    df['normalized origin1'] = origin1
    origin2 = df['origin2']/df['origin2'].sum()
    df['normalized origin2'] = origin2
    origin3 = df['origin3']/df['origin3'].sum()
    df['normalized origin3'] = origin3
    
    quantity1 = df['quantity1']/df['quantity1'].sum()
    df['normalized quantity1'] = quantity1
    quantity2 = df['quantity2']/df['quantity2'].sum()
    df['normalized quantity2'] = quantity2
    quantity3 = df['quantity3']/df['quantity3'].sum()
    df['normalized quantity3'] = quantity3
    df['5end'] = df['4mers'].apply( lambda x: x[0])
    df['3end'] = df['4mers'].apply( lambda x: x[-1])
    df['CPD'] = df['4mers'].apply( lambda x: x[1:3])
    
    
    
    df["quanti mean"] = np.mean(df[["normalized quantity1",'normalized quantity2','normalized quantity3']], axis = 1)
    df["quanti std"] = np.std(df[["normalized quantity1",'normalized quantity2','normalized quantity3']], axis = 1)
    
  
    df["origin mean"] = np.mean(df[["normalized origin1",'normalized origin2','normalized origin3']], axis = 1)
    df["origin std"] = np.std(df[["normalized origin1",'normalized origin2','normalized origin3']], axis = 1)
    
  
    output = df.to_html(formatters={
    'normalized origin1': '{:,.1%}'.format,
    'normalized origin2': '{:,.1%}'.format,
    'normalized origin3': '{:,.1%}'.format,
    'normalized quantity1': '{:,.1%}'.format,
    'normalized quantity2': '{:,.1%}'.format,
    'normalized quantity3': '{:,.1%}'.format,
    'quanti mean': '{:,.1%}'.format,
    'quanti std': '{:,.1%}'.format,
    'origin mean': '{:,.1%}'.format,
    'origin std': '{:,.1%}'.format  
    })
    print filename
    display(HTML(output))
    
    return df
In [ ]:
def transform_df(df, PP):
    names = ['A','C','G','T']
    pypy = df[df['CPD'] == PP].sort(['5end','3end'], ascending=True)['quanti mean'].reshape(4,4)
    pypy = pd.DataFrame(pypy)
    three_ends = pypy.sum(axis = 0)
    five_ends = pypy.sum(axis = 1)
    columns = [ name + ': {:.0%}'.format(three_end) for (name , three_end)in zip(names , three_ends)]
    pypy.columns = columns
    index = [ name + ': {:.0%}'.format(five_end) for (name , five_end)in zip(names , five_ends)]
    pypy.index = index
    pypy.index.name = '5\'X'
    pypy.columns.name = '3\'Y'
    return pypy
In [ ]:
def plt_CPD_HEATMAP(df, uv, filepath):
#     df["quanti mean"] = np.mean(df[["normalized quantity1",'normalized quantity2','normalized quantity3']], axis = 1)
#     df["quanti std"] = np.std(df[["normalized quantity1",'normalized quantity2','normalized quantity3']], axis = 1)

    max_y = 0.07
 
    tt = transform_df(df, 'TT')
    tc = transform_df(df,'TC')
    ct = transform_df(df, 'CT')
    cc = transform_df(df,'CC')
    all_data = [tt, tc, ct ,cc]
    
    
    fig = plt.figure(figsize=(40, 7))
    i = 0
    fig.subplots_adjust(wspace=.3, top = 0.85)
   
    fig.suptitle(uv , fontsize = 30)
    
    CPD = ['TT','TC','CT','CC'] 
    for data in all_data :
        sns.set(font_scale=2)
        ax = fig.add_subplot(141 + i)
        ax = sns.heatmap(data,vmin=0, vmax=max_y, annot=True, fmt = '.1%',annot_kws={"size":30})
        total = np.sum(np.sum(data))
        ax.set_title(CPD[i] + ' : {:.0%}'.format(total), fontsize = 30)
        i=i+1
    filename = uv 
    plt.savefig(filepath + '%s_HEATMAP.pdf'%(filename), format = 'pdf',dpi=300)  
    
In [79]:
def plt_total(df, uv, file_path):
    max_y = np.max(df["quanti mean"]+ 0.01)
    df = df.sort(["quanti mean"], ascending=False)

    fig = plt.figure()
    ax = fig.add_subplot(111)

    ## the data
    N = 64 

    ## necessary variables
    ind = np.arange(N)             # the x locations for the groups
    width = 0.4                      # the width of the bars

    ## the bars
   
    rects = ax.bar(ind+width, df["quanti mean"], width,
                    color='grey',
                    yerr= df["quanti std"],
                    error_kw=dict(elinewidth=0.5,ecolor='red'))

    # axes and labels
    ax.set_xlim(0,len(ind)+width)
    ax.set_ylim(0, max_y)
    ax.set_ylabel('normalized relative Qty', fontsize = 12)
    ax.set_title(uv, fontsize = 20)
    
    xTickMarks = df['4mers']
    ax.set_xticks(ind+2*width)
    xtickNames = ax.set_xticklabels(xTickMarks)  
    plt.setp(xtickNames, rotation=90)
    
    vals = ax.get_yticks()
    ax.set_yticklabels(['{:.0f}%'.format(x*100) for x in vals])
    
    plt.tick_params(axis='both', which='major', labelsize=7)
    plt.tick_params(axis='both', which='minor', labelsize=7)

    filename = uv 
    plt.savefig(file_path + '%s_barplot.pdf'%(filename), format = 'pdf',dpi=300) 

show dataframe and plot heatmap and barplot for UVC, BB UVB, FiltBB UVB, and NB UVB

In [80]:
%matplotlib inline
names = ["UVC","BB UVB","FiltBB UVB","NB UVB"]
for j in range(len(names)):
    filepath = '/Users/Chen/Documents/research/all data/data analysis/'
    filename = filepath + '%s.csv' %(names[j])
    df = construct_df(filename)
    #max_c_containing_cpd_yield = np.max(df[df['CPD'] != 'TT' ]['quanti mean']) 
    #max_c_containing_cpd = df[df['quanti mean'] == max_c_containing_cpd_yield ]['4mers']
    #print filename, max_c_containing_cpd
    plt_CPD_HEATMAP(df, names[j], filepath)
    plt_total(df, names[j], filepath)
/Users/Chen/Documents/research/all data/data analysis/UVC.csv
4mers origin1 origin2 origin3 quantity1 quantity2 quantity3 normalized origin1 normalized origin2 normalized origin3 normalized quantity1 normalized quantity2 normalized quantity3 5end 3end CPD quanti mean quanti std origin mean origin std
0 GTTG 98.232330 112.397776 72.249041 1.894604 1.155363 1.227031 0.6% 0.6% 0.7% 1.0% 0.6% 0.7% G G TT 0.8% 0.2% 0.6% 0.0%
1 ACCG 45.454714 100.246222 71.285144 1.091761 0.894071 0.989051 0.3% 0.6% 0.7% 0.6% 0.5% 0.6% A G CC 0.6% 0.0% 0.5% 0.2%
2 TCTA 325.303726 270.059716 100.550238 3.011755 2.076148 1.976414 1.9% 1.5% 0.9% 1.6% 1.2% 1.2% T A CT 1.3% 0.2% 1.4% 0.4%
3 CTCT 130.798608 59.782772 106.257716 2.581004 1.833221 1.843921 0.7% 0.3% 1.0% 1.4% 1.0% 1.1% C T TC 1.2% 0.2% 0.7% 0.3%
4 ACTC 125.563298 131.469449 52.947096 0.831735 0.660327 0.881032 0.7% 0.7% 0.5% 0.4% 0.4% 0.5% A C CT 0.4% 0.1% 0.6% 0.1%
5 GTCA 23.187486 103.684129 22.415362 0.727549 0.589369 0.627018 0.1% 0.6% 0.2% 0.4% 0.3% 0.4% G A TC 0.4% 0.0% 0.3% 0.2%
6 CTCG 236.748271 173.913425 114.360359 2.860291 2.784059 2.422837 1.3% 1.0% 1.1% 1.5% 1.6% 1.4% C G TC 1.5% 0.0% 1.1% 0.2%
7 TCTC 81.263133 55.439150 51.343966 1.239722 0.371486 0.919851 0.5% 0.3% 0.5% 0.7% 0.2% 0.5% T C CT 0.5% 0.2% 0.4% 0.1%
8 GTCT 23.700021 26.111412 38.491997 0.825606 0.503385 0.764573 0.1% 0.1% 0.4% 0.4% 0.3% 0.5% G T TC 0.4% 0.1% 0.2% 0.1%
9 TTTG 769.420615 444.117118 329.288253 7.933872 7.128359 6.810273 4.4% 2.4% 3.1% 4.3% 4.0% 4.0% T G TT 4.1% 0.1% 3.3% 0.8%
10 TTTT 305.394881 176.411181 149.667480 3.642997 2.673865 2.498788 1.7% 1.0% 1.4% 2.0% 1.5% 1.5% T T TT 1.6% 0.2% 1.4% 0.3%
11 ATTT 170.885227 82.452349 79.520419 1.891977 1.652070 1.701302 1.0% 0.5% 0.7% 1.0% 0.9% 1.0% A T TT 1.0% 0.0% 0.7% 0.2%
12 ACTA 185.079447 365.387286 113.119233 2.154630 1.833221 1.530835 1.1% 2.0% 1.1% 1.2% 1.0% 0.9% A A CT 1.0% 0.1% 1.4% 0.4%
13 ATCA 211.335967 247.605688 131.501337 2.404151 2.107036 1.947721 1.2% 1.4% 1.2% 1.3% 1.2% 1.2% A A TC 1.2% 0.1% 1.3% 0.1%
14 CCTA 408.136120 331.111103 136.783346 3.014381 2.389198 2.070931 2.3% 1.8% 1.3% 1.6% 1.3% 1.2% C A CT 1.4% 0.2% 1.8% 0.4%
15 CCCT 121.031183 83.542187 73.936608 1.291377 1.125311 0.892846 0.7% 0.5% 0.7% 0.7% 0.6% 0.5% C T CC 0.6% 0.1% 0.6% 0.1%
16 GCCC 43.997773 98.264397 32.171432 0.668890 0.668675 0.535876 0.3% 0.5% 0.3% 0.4% 0.4% 0.3% G C CC 0.3% 0.0% 0.4% 0.1%
17 CCCG 133.905628 298.555297 73.401759 1.260735 1.057692 0.780607 0.8% 1.6% 0.7% 0.7% 0.6% 0.5% C G CC 0.6% 0.1% 1.0% 0.4%
18 TCCC 110.764596 42.429475 69.186004 1.279996 1.071883 0.891158 0.6% 0.2% 0.6% 0.7% 0.6% 0.5% T C CC 0.6% 0.1% 0.5% 0.2%
19 GTCC 47.862787 68.585267 35.232019 0.655757 0.675353 0.456550 0.3% 0.4% 0.3% 0.4% 0.4% 0.3% G C TC 0.3% 0.0% 0.3% 0.0%
20 GCCG 36.950919 83.015128 32.329862 0.548945 0.400704 0.389038 0.2% 0.5% 0.3% 0.3% 0.2% 0.2% G G CC 0.2% 0.0% 0.3% 0.1%
21 ACTG 36.922960 87.464840 54.767960 0.866000 1.893000 0.886000 0.2% 0.5% 0.5% 0.5% 1.1% 0.5% A G CT 0.7% 0.3% 0.4% 0.1%
22 ACCA 125.111611 182.020200 116.074520 1.830000 2.476000 2.271000 0.7% 1.0% 1.1% 1.0% 1.4% 1.3% A A CC 1.2% 0.2% 0.9% 0.2%
23 ATTA 699.430689 854.874310 462.002960 8.041000 7.779000 8.723000 4.0% 4.7% 4.3% 4.3% 4.3% 5.2% A A TT 4.6% 0.4% 4.3% 0.3%
24 TTTA 1221.814837 1455.408670 732.633450 11.376000 11.790000 13.070000 6.9% 8.0% 6.8% 6.1% 6.6% 7.8% T A TT 6.8% 0.7% 7.3% 0.5%
25 CTTT 753.245881 243.846290 392.499360 4.882000 4.113000 4.726000 4.3% 1.3% 3.7% 2.6% 2.3% 2.8% C T TT 2.6% 0.2% 3.1% 1.3%
26 GCTT 103.137874 243.939680 63.662610 1.677000 1.548000 1.594000 0.6% 1.3% 0.6% 0.9% 0.9% 0.9% G T CT 0.9% 0.0% 0.8% 0.4%
27 CCTG 47.205195 26.472900 118.930140 1.081000 1.162000 0.928000 0.3% 0.1% 1.1% 0.6% 0.6% 0.6% C G CT 0.6% 0.0% 0.5% 0.4%
28 TCCT 80.279850 107.616230 141.106560 1.458000 1.586000 1.029000 0.5% 0.6% 1.3% 0.8% 0.9% 0.6% T T CC 0.8% 0.1% 0.8% 0.4%
29 ATCC 185.113670 120.839130 44.421460 2.097000 2.355000 1.181000 1.1% 0.7% 0.4% 1.1% 1.3% 0.7% A C TC 1.0% 0.3% 0.7% 0.3%
30 CTCA 371.911240 355.358180 237.439440 4.104000 4.015000 4.152000 2.1% 2.0% 2.2% 2.2% 2.2% 2.5% C A TC 2.3% 0.1% 2.1% 0.1%
31 CCTC 116.738070 99.357680 79.144030 1.571000 1.321000 1.642000 0.7% 0.5% 0.7% 0.8% 0.7% 1.0% C C CT 0.9% 0.1% 0.6% 0.1%
32 GCCT 42.213710 70.557650 31.261790 1.183000 1.228000 0.668000 0.2% 0.4% 0.3% 0.6% 0.7% 0.4% G T CC 0.6% 0.1% 0.3% 0.1%
33 CTTG 316.914410 360.922280 209.467830 3.943000 3.622000 4.150000 1.8% 2.0% 2.0% 2.1% 2.0% 2.5% C G TT 2.2% 0.2% 1.9% 0.1%
34 CCTT 120.244480 47.647340 69.895410 1.262000 1.437000 1.636000 0.7% 0.3% 0.7% 0.7% 0.8% 1.0% C T CT 0.8% 0.1% 0.5% 0.2%
35 ACCT 97.808210 192.458060 72.412710 1.529000 1.751000 1.113000 0.6% 1.1% 0.7% 0.8% 1.0% 0.7% A T CC 0.8% 0.1% 0.8% 0.2%
36 TCCA 252.335580 314.542770 144.528140 2.885000 2.928000 2.869000 1.4% 1.7% 1.3% 1.5% 1.6% 1.7% T A CC 1.6% 0.1% 1.5% 0.2%
37 CTCC 286.859710 247.767490 187.988090 3.397000 3.156000 3.260000 1.6% 1.4% 1.8% 1.8% 1.8% 1.9% C C TC 1.8% 0.1% 1.6% 0.2%
38 GCTC 76.071590 80.994960 51.542590 1.283000 1.299000 0.848000 0.4% 0.4% 0.5% 0.7% 0.7% 0.5% G C CT 0.6% 0.1% 0.5% 0.0%
39 TTCG 376.739770 384.250640 215.279950 4.061000 3.937000 3.773000 2.1% 2.1% 2.0% 2.2% 2.2% 2.2% T G TC 2.2% 0.0% 2.1% 0.1%
40 CTTC 416.146700 413.345800 255.910860 4.396000 4.330000 4.070000 2.4% 2.3% 2.4% 2.4% 2.4% 2.4% C C TT 2.4% 0.0% 2.3% 0.1%
41 ACTT 141.463350 86.564890 156.516780 2.319000 1.622000 1.779000 0.8% 0.5% 1.5% 1.2% 0.9% 1.1% A T CT 1.1% 0.1% 0.9% 0.4%
42 TTCA 669.675970 725.908680 317.528440 6.613000 6.604000 5.956000 3.8% 4.0% 3.0% 3.5% 3.7% 3.5% T A TC 3.6% 0.1% 3.6% 0.4%
43 TTTC 671.141170 577.954420 419.976290 7.005000 6.669000 6.053000 3.8% 3.2% 3.9% 3.8% 3.7% 3.6% T C TT 3.7% 0.1% 3.6% 0.3%
44 GTTT 208.506950 214.975070 147.349120 2.354000 2.173000 2.952000 1.2% 1.2% 1.4% 1.3% 1.2% 1.8% G T TT 1.4% 0.2% 1.2% 0.1%
45 ATTG 600.380484 561.823916 344.453215 6.442953 7.160215 5.833914 3.4% 3.1% 3.2% 3.5% 4.0% 3.5% A G TT 3.6% 0.2% 3.2% 0.1%
46 CCCA 336.107133 260.667568 189.392034 3.202855 3.361277 2.707637 1.9% 1.4% 1.8% 1.7% 1.9% 1.6% C A CC 1.7% 0.1% 1.7% 0.2%
47 CCCC 80.480019 96.278561 55.080170 1.458497 1.645912 1.436801 0.5% 0.5% 0.5% 0.8% 0.9% 0.9% C C CC 0.9% 0.1% 0.5% 0.0%
48 ACCC 114.872718 53.642936 73.784829 1.611493 1.622762 1.353819 0.7% 0.3% 0.7% 0.9% 0.9% 0.8% A C CC 0.9% 0.0% 0.5% 0.2%
49 CTTA 1043.576232 1110.221141 341.847614 9.767601 9.723340 9.169976 5.9% 6.1% 3.2% 5.2% 5.4% 5.4% C A TT 5.4% 0.1% 5.1% 1.3%
50 TCTT 227.532688 210.764274 585.948346 2.724742 2.788019 2.319766 1.3% 1.2% 5.5% 1.5% 1.6% 1.4% T T CT 1.5% 0.1% 2.6% 2.0%
51 ATCT 230.452602 251.337548 82.279615 2.359363 2.428630 2.080144 1.3% 1.4% 0.8% 1.3% 1.4% 1.2% A T TC 1.3% 0.0% 1.2% 0.3%
52 GCCA 99.627768 117.189014 80.787207 1.368914 1.591894 1.215826 0.6% 0.6% 0.8% 0.7% 0.9% 0.7% G A CC 0.8% 0.1% 0.7% 0.1%
53 TCTG 111.309228 230.847188 125.666229 1.137406 1.545592 1.574793 0.6% 1.3% 1.2% 0.6% 0.9% 0.9% T G CT 0.8% 0.1% 1.0% 0.3%
54 TTCT 442.727221 605.803031 340.806834 3.874227 4.205730 4.303875 2.5% 3.3% 3.2% 2.1% 2.3% 2.6% T T TC 2.3% 0.2% 3.0% 0.4%
55 ATTC 617.022651 716.644004 382.722673 5.809830 5.509893 5.052578 3.5% 3.9% 3.6% 3.1% 3.1% 3.0% A C TT 3.1% 0.0% 3.7% 0.2%
56 GCTA 180.445337 265.994364 134.113870 1.908427 1.907186 1.661505 1.0% 1.5% 1.3% 1.0% 1.1% 1.0% G A CT 1.0% 0.0% 1.2% 0.2%
57 ATCG 238.753727 293.573227 154.866789 2.424789 2.258858 2.164058 1.4% 1.6% 1.4% 1.3% 1.3% 1.3% A G TC 1.3% 0.0% 1.5% 0.1%
58 GTTA 624.090048 742.415526 329.428406 5.548126 5.013804 4.566807 3.5% 4.1% 3.1% 3.0% 2.8% 2.7% G A TT 2.8% 0.1% 3.6% 0.4%
59 TCCG 336.147336 460.805400 216.345450 2.847541 3.446163 2.783160 1.9% 2.5% 2.0% 1.5% 1.9% 1.7% T G CC 1.7% 0.2% 2.2% 0.3%
60 TTCC 638.424152 704.685437 351.999854 5.583355 5.523122 4.622749 3.6% 3.9% 3.3% 3.0% 3.1% 2.7% T C TC 2.9% 0.1% 3.6% 0.2%
61 GTTC 389.750931 427.242910 205.690160 3.450467 3.355765 2.754256 2.2% 2.3% 1.9% 1.9% 1.9% 1.6% G C TT 1.8% 0.1% 2.2% 0.2%
62 GTCG 105.191819 105.852666 50.856442 0.878722 0.892960 0.680639 0.6% 0.6% 0.5% 0.5% 0.5% 0.4% G G TC 0.5% 0.0% 0.6% 0.1%
63 GCTG 114.301470 152.993135 72.632126 0.900866 1.155336 0.586468 0.7% 0.8% 0.7% 0.5% 0.6% 0.3% G G CT 0.5% 0.1% 0.7% 0.1%
/Users/Chen/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:3: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  app.launch_new_instance()
/Users/Chen/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:3: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead
  app.launch_new_instance()
/Users/Chen/Documents/research/all data/data analysis/BB UVB.csv
4mers origin1 origin2 origin3 quantity1 quantity2 quantity3 normalized origin1 normalized origin2 normalized origin3 normalized quantity1 normalized quantity2 normalized quantity3 5end 3end CPD quanti mean quanti std origin mean origin std
0 GTTG 73.026749 33.155274 58.152609 1.184548 0.821710 1.056279 0.6% 0.4% 0.9% 0.7% 0.5% 0.6% G G TT 0.6% 0.1% 0.6% 0.2%
1 ACCG 165.392205 70.640132 103.462375 1.936628 1.735711 2.456113 1.3% 0.8% 1.6% 1.1% 1.0% 1.5% A G CC 1.2% 0.2% 1.2% 0.3%
2 TCTA 343.394311 100.765022 101.036230 3.054364 2.323456 1.350377 2.7% 1.1% 1.6% 1.7% 1.4% 0.8% T A CT 1.3% 0.4% 1.8% 0.6%
3 CTCT 125.986767 135.625061 154.791853 3.416410 2.686142 3.301313 1.0% 1.5% 2.4% 1.9% 1.6% 2.0% C T TC 1.8% 0.2% 1.6% 0.6%
4 ACTC 157.423875 47.301912 35.394586 1.236011 1.007101 1.037732 1.2% 0.5% 0.5% 0.7% 0.6% 0.6% A C CT 0.6% 0.0% 0.8% 0.3%
5 GTCA 26.752503 27.307255 52.788823 0.918205 1.518747 0.894658 0.2% 0.3% 0.8% 0.5% 0.9% 0.5% G A TC 0.6% 0.2% 0.4% 0.3%
6 CTCG 236.463240 115.736288 153.147165 3.518433 2.820530 3.343706 1.8% 1.3% 2.4% 2.0% 1.6% 2.0% C G TC 1.9% 0.2% 1.8% 0.4%
7 TCTC 94.599622 48.625199 66.595162 1.692856 0.886475 1.215251 0.7% 0.6% 1.0% 0.9% 0.5% 0.7% T C CT 0.7% 0.2% 0.8% 0.2%
8 GTCT 43.821355 36.885586 37.856281 0.936262 0.981195 1.171092 0.3% 0.4% 0.6% 0.5% 0.6% 0.7% G T TC 0.6% 0.1% 0.4% 0.1%
9 TTTG 450.247040 202.043801 200.167979 5.985037 4.780491 5.221339 3.5% 2.3% 3.1% 3.4% 2.8% 3.1% T G TT 3.1% 0.2% 2.9% 0.5%
10 TTTT 229.144686 124.560535 109.285509 3.150970 2.429509 2.576225 1.8% 1.4% 1.7% 1.8% 1.4% 1.5% T T TT 1.6% 0.1% 1.6% 0.2%
11 ATTT 96.372474 60.677181 53.805227 1.468045 1.322022 1.221433 0.7% 0.7% 0.8% 0.8% 0.8% 0.7% A T TT 0.8% 0.0% 0.8% 0.1%
12 ACTA 151.065352 119.759067 77.456923 2.119005 1.907339 1.868801 1.2% 1.4% 1.2% 1.2% 1.1% 1.1% A A CT 1.1% 0.0% 1.2% 0.1%
13 ATCA 198.162669 145.677284 100.156801 2.631827 2.350982 2.468478 1.5% 1.7% 1.5% 1.5% 1.4% 1.5% A A TC 1.4% 0.0% 1.6% 0.1%
14 CCTA 238.090920 137.678062 100.070422 2.713987 2.499132 2.595655 1.8% 1.6% 1.5% 1.5% 1.5% 1.5% C A CT 1.5% 0.0% 1.6% 0.1%
15 CCCT 201.064115 135.115213 97.705654 2.575850 2.124303 2.361614 1.6% 1.5% 1.5% 1.4% 1.2% 1.4% C T CC 1.4% 0.1% 1.5% 0.0%
16 GCCC 96.688142 62.731736 44.336377 1.166491 1.078343 0.970611 0.7% 0.7% 0.7% 0.7% 0.6% 0.6% G C CC 0.6% 0.0% 0.7% 0.0%
17 CCCG 178.029268 103.875318 74.346154 1.758765 1.742997 1.751339 1.4% 1.2% 1.1% 1.0% 1.0% 1.0% C G CC 1.0% 0.0% 1.2% 0.1%
18 TCCC 154.220551 107.304977 78.592311 2.092822 1.720329 1.853787 1.2% 1.2% 1.2% 1.2% 1.0% 1.1% T C CC 1.1% 0.1% 1.2% 0.0%
19 GTCC 62.475314 47.977591 33.272441 0.805348 0.705133 0.673864 0.5% 0.5% 0.5% 0.5% 0.4% 0.4% G C TC 0.4% 0.0% 0.5% 0.0%
20 GCCG 77.090152 60.490186 40.176155 0.947097 0.880809 0.877877 0.6% 0.7% 0.6% 0.5% 0.5% 0.5% G G CC 0.5% 0.0% 0.6% 0.0%
21 ACTG 59.390330 67.964500 47.340260 1.567000 1.583000 1.599000 0.5% 0.8% 0.7% 0.9% 0.9% 0.9% A G CT 0.9% 0.0% 0.7% 0.1%
22 ACCA 177.123425 107.700280 87.820120 2.708000 2.371000 2.789000 1.4% 1.2% 1.3% 1.5% 1.4% 1.7% A A CC 1.5% 0.1% 1.3% 0.1%
23 ATTA 336.049589 217.898770 191.469090 5.661000 5.162000 5.767000 2.6% 2.5% 2.9% 3.2% 3.0% 3.4% A A TT 3.2% 0.2% 2.7% 0.2%
24 TTTA 794.706618 436.563370 314.749870 8.982000 8.730000 9.265000 6.1% 4.9% 4.8% 5.0% 5.1% 5.5% T A TT 5.2% 0.2% 5.3% 0.6%
25 CTTT 465.865024 282.249090 209.621060 4.848000 3.422000 4.462000 3.6% 3.2% 3.2% 2.7% 2.0% 2.6% C T TT 2.5% 0.3% 3.3% 0.2%
26 GCTT 114.430543 77.789540 46.080370 1.945000 2.180000 2.186000 0.9% 0.9% 0.7% 1.1% 1.3% 1.3% G T CT 1.2% 0.1% 0.8% 0.1%
27 CCTG 66.046202 59.113690 122.389940 1.236000 1.219000 1.989000 0.5% 0.7% 1.9% 0.7% 0.7% 1.2% C G CT 0.9% 0.2% 1.0% 0.6%
28 TCCT 194.864860 151.447000 104.958270 3.474000 2.821000 2.545000 1.5% 1.7% 1.6% 1.9% 1.6% 1.5% T T CC 1.7% 0.2% 1.6% 0.1%
29 ATCC 205.230440 146.316040 26.656200 2.724000 2.611000 2.950000 1.6% 1.7% 0.4% 1.5% 1.5% 1.7% A C TC 1.6% 0.1% 1.2% 0.6%
30 CTCA 361.790840 232.218830 171.564470 5.097000 4.760000 5.498000 2.8% 2.6% 2.6% 2.9% 2.8% 3.3% C A TC 3.0% 0.2% 2.7% 0.1%
31 CCTC 101.918590 83.840130 53.011630 1.641000 1.456000 1.651000 0.8% 0.9% 0.8% 0.9% 0.9% 1.0% C C CT 0.9% 0.1% 0.9% 0.1%
32 GCCT 95.400500 81.367360 49.065760 1.948000 2.124000 1.361000 0.7% 0.9% 0.8% 1.1% 1.2% 0.8% G T CC 1.0% 0.2% 0.8% 0.1%
33 CTTG 191.976840 138.672110 98.956450 3.174000 2.999000 3.056000 1.5% 1.6% 1.5% 1.8% 1.8% 1.8% C G TT 1.8% 0.0% 1.5% 0.0%
34 CCTT 122.905910 87.938510 55.555710 1.697000 1.412000 1.634000 0.9% 1.0% 0.9% 1.0% 0.8% 1.0% C T CT 0.9% 0.1% 0.9% 0.1%
35 ACCT 186.382510 151.666680 96.173690 3.051000 3.197000 2.938000 1.4% 1.7% 1.5% 1.7% 1.9% 1.7% A T CC 1.8% 0.1% 1.5% 0.1%
36 TCCA 269.919570 175.992920 116.648620 3.880000 3.910000 3.589000 2.1% 2.0% 1.8% 2.2% 2.3% 2.1% T A CC 2.2% 0.1% 2.0% 0.1%
37 CTCC 275.130220 189.738970 138.358370 3.878000 3.627000 3.477000 2.1% 2.1% 2.1% 2.2% 2.1% 2.1% C C TC 2.1% 0.0% 2.1% 0.0%
38 GCTC 72.491180 67.990160 33.839230 1.530000 1.648000 1.106000 0.6% 0.8% 0.5% 0.9% 1.0% 0.7% G C CT 0.8% 0.1% 0.6% 0.1%
39 TTCG 257.791400 227.278840 148.817130 4.630000 4.608000 4.313000 2.0% 2.6% 2.3% 2.6% 2.7% 2.6% T G TC 2.6% 0.1% 2.3% 0.2%
40 CTTC 156.862000 170.879580 130.168260 3.327000 3.130000 2.838000 1.2% 1.9% 2.0% 1.9% 1.8% 1.7% C C TT 1.8% 0.1% 1.7% 0.4%
41 ACTT 75.240950 133.540380 83.580440 2.398000 2.656000 2.331000 0.6% 1.5% 1.3% 1.3% 1.6% 1.4% A T CT 1.4% 0.1% 1.1% 0.4%
42 TTCA 493.625720 331.527890 190.403650 7.108000 7.082000 6.478000 3.8% 3.8% 2.9% 4.0% 4.1% 3.8% T A TC 4.0% 0.1% 3.5% 0.4%
43 TTTC 378.404450 259.597500 182.531700 4.965000 4.508000 4.391000 2.9% 2.9% 2.8% 2.8% 2.6% 2.6% T C TT 2.7% 0.1% 2.9% 0.1%
44 GTTT 98.352020 99.378990 92.050770 1.853000 2.038000 2.700000 0.8% 1.1% 1.4% 1.0% 1.2% 1.6% G T TT 1.3% 0.2% 1.1% 0.3%
45 ATTG 255.960948 193.344884 122.887681 3.624079 3.585279 3.906897 2.0% 2.2% 1.9% 2.0% 2.1% 2.3% A G TT 2.1% 0.1% 2.0% 0.1%
46 CCCA 308.709258 198.839728 121.487361 3.700739 3.979266 3.254357 2.4% 2.3% 1.9% 2.1% 2.3% 1.9% C A CC 2.1% 0.2% 2.2% 0.2%
47 CCCC 72.916029 114.635123 69.744088 1.758967 1.576932 1.304246 0.6% 1.3% 1.1% 1.0% 0.9% 0.8% C C CC 0.9% 0.1% 1.0% 0.3%
48 ACCC 156.059323 105.223264 68.401488 1.861742 2.632816 1.658888 1.2% 1.2% 1.1% 1.0% 1.5% 1.0% A C CC 1.2% 0.2% 1.1% 0.1%
49 CTTA 446.280186 306.304683 207.098117 5.723383 5.774860 5.407239 3.4% 3.5% 3.2% 3.2% 3.4% 3.2% C A TT 3.3% 0.1% 3.4% 0.1%
50 TCTT 186.426749 129.225876 100.920556 2.223139 2.026077 2.197943 1.4% 1.5% 1.6% 1.2% 1.2% 1.3% T T CT 1.2% 0.0% 1.5% 0.0%
51 ATCT 208.950013 158.456423 107.726667 2.521355 2.919441 2.537564 1.6% 1.8% 1.7% 1.4% 1.7% 1.5% A T TC 1.5% 0.1% 1.7% 0.1%
52 GCCA 120.631189 106.233523 71.078207 1.740434 2.317627 1.739829 0.9% 1.2% 1.1% 1.0% 1.4% 1.0% G A CC 1.1% 0.2% 1.1% 0.1%
53 TCTG 164.138137 99.182893 74.275016 1.097670 1.689218 1.389360 1.3% 1.1% 1.1% 0.6% 1.0% 0.8% T G CT 0.8% 0.2% 1.2% 0.1%
54 TTCT 267.939215 245.021097 179.291777 3.799302 4.287560 4.382534 2.1% 2.8% 2.8% 2.1% 2.5% 2.6% T T TC 2.4% 0.2% 2.5% 0.3%
55 ATTC 256.610328 179.725133 122.310228 3.045338 3.157803 2.925584 2.0% 2.0% 1.9% 1.7% 1.8% 1.7% A C TT 1.8% 0.1% 2.0% 0.1%
56 GCTA 112.231723 103.360704 61.914428 1.480970 1.877347 1.473640 0.9% 1.2% 1.0% 0.8% 1.1% 0.9% G A CT 0.9% 0.1% 1.0% 0.1%
57 ATCG 167.693233 125.284581 83.663979 2.203764 2.250649 2.061093 1.3% 1.4% 1.3% 1.2% 1.3% 1.2% A G TC 1.3% 0.0% 1.3% 0.1%
58 GTTA 242.894870 167.730380 103.223732 2.788401 3.021878 2.520875 1.9% 1.9% 1.6% 1.6% 1.8% 1.5% G A TT 1.6% 0.1% 1.8% 0.1%
59 TCCG 279.017488 168.366399 121.516611 3.244991 3.222811 3.084964 2.2% 1.9% 1.9% 1.8% 1.9% 1.8% T G CC 1.8% 0.0% 2.0% 0.1%
60 TTCC 444.816494 258.385027 179.879294 5.262581 4.579110 4.404230 3.4% 2.9% 2.8% 2.9% 2.7% 2.6% T C TC 2.7% 0.1% 3.0% 0.3%
61 GTTC 173.606280 115.841376 71.660205 2.063922 2.115709 1.814095 1.3% 1.3% 1.1% 1.2% 1.2% 1.1% G C TT 1.2% 0.1% 1.3% 0.1%
62 GTCG 64.914408 62.017413 34.010131 0.824727 0.937688 0.786052 0.5% 0.7% 0.5% 0.5% 0.5% 0.5% G G TC 0.5% 0.0% 0.6% 0.1%
63 GCTG 73.459724 85.678981 41.132565 0.836520 1.487300 0.935419 0.6% 1.0% 0.6% 0.5% 0.9% 0.6% G G CT 0.6% 0.2% 0.7% 0.2%
/Users/Chen/Documents/research/all data/data analysis/FiltBB UVB.csv
4mers origin1 origin2 origin3 quantity1 quantity2 quantity3 normalized origin1 normalized origin2 normalized origin3 normalized quantity1 normalized quantity2 normalized quantity3 5end 3end CPD quanti mean quanti std origin mean origin std
0 GTTG 40.486650 46.032510 32.584272 1.782591 0.945147 1.142673 0.5% 0.6% 0.8% 1.0% 0.6% 0.8% G G TT 0.8% 0.2% 0.6% 0.1%
1 ACCG 135.603224 98.185606 68.417689 2.910705 2.140171 2.462246 1.7% 1.2% 1.7% 1.6% 1.3% 1.6% A G CC 1.5% 0.1% 1.6% 0.2%
2 TCTA 247.800247 114.853924 60.224766 3.945167 2.677625 1.428581 3.2% 1.5% 1.5% 2.2% 1.6% 0.9% T A CT 1.6% 0.5% 2.1% 0.8%
3 CTCT 82.591886 155.427762 101.462350 3.897804 2.534713 4.315385 1.1% 2.0% 2.6% 2.2% 1.6% 2.9% C T TC 2.2% 0.5% 1.9% 0.6%
4 ACTC 96.634133 22.601075 20.076195 2.034479 1.551865 1.025059 1.2% 0.3% 0.5% 1.1% 1.0% 0.7% A C CT 0.9% 0.2% 0.7% 0.4%
5 GTCA 22.185956 50.860292 30.254991 1.414447 0.899556 1.134067 0.3% 0.6% 0.8% 0.8% 0.6% 0.8% G A TC 0.7% 0.1% 0.6% 0.2%
6 CTCG 149.465237 154.517188 94.697113 4.112016 2.746889 3.082827 1.9% 2.0% 2.4% 2.3% 1.7% 2.0% C G TC 2.0% 0.2% 2.1% 0.2%
7 TCTC 73.827738 78.307796 43.240351 1.902077 1.488738 1.322441 1.0% 1.0% 1.1% 1.1% 0.9% 0.9% T C CT 0.9% 0.1% 1.0% 0.1%
8 GTCT 50.454380 34.525172 18.180339 1.546850 1.088059 1.504121 0.6% 0.4% 0.5% 0.9% 0.7% 1.0% G T TC 0.8% 0.1% 0.5% 0.1%
9 TTTG 176.319745 167.970120 93.175590 4.679302 3.471969 3.476787 2.3% 2.1% 2.4% 2.6% 2.1% 2.3% T G TT 2.3% 0.2% 2.3% 0.1%
10 TTTT 94.271823 87.085689 44.714226 2.348801 1.891171 1.513684 1.2% 1.1% 1.1% 1.3% 1.2% 1.0% T T TT 1.2% 0.1% 1.2% 0.0%
11 ATTT 37.814074 33.820911 21.322729 1.221764 0.922351 1.200046 0.5% 0.4% 0.5% 0.7% 0.6% 0.8% A T TT 0.7% 0.1% 0.5% 0.0%
12 ACTA 109.485951 91.508637 44.829834 2.378941 1.927118 1.659984 1.4% 1.2% 1.1% 1.3% 1.2% 1.1% A A CT 1.2% 0.1% 1.2% 0.1%
13 ATCA 145.496529 117.424420 62.982366 3.053872 2.390047 2.336026 1.9% 1.5% 1.6% 1.7% 1.5% 1.5% A A TC 1.6% 0.1% 1.7% 0.2%
14 CCTA 172.165080 113.236731 60.907761 3.556571 2.375142 2.211718 2.2% 1.4% 1.6% 2.0% 1.5% 1.5% C A CT 1.6% 0.2% 1.7% 0.3%
15 CCCT 126.076499 119.001418 62.704516 2.866570 2.305002 2.118966 1.6% 1.5% 1.6% 1.6% 1.4% 1.4% C T CC 1.5% 0.1% 1.6% 0.1%
16 GCCC 84.471114 63.287867 35.610567 1.669565 1.314263 1.461092 1.1% 0.8% 0.9% 0.9% 0.8% 1.0% G C CC 0.9% 0.1% 0.9% 0.1%
17 CCCG 157.902161 88.409187 51.273321 1.995727 1.601840 1.554801 2.0% 1.1% 1.3% 1.1% 1.0% 1.0% C G CC 1.0% 0.1% 1.5% 0.4%
18 TCCC 89.635525 102.306389 57.462346 2.516726 1.856977 1.936329 1.2% 1.3% 1.5% 1.4% 1.1% 1.3% T C CC 1.3% 0.1% 1.3% 0.1%
19 GTCC 58.430762 50.757149 28.890274 1.175477 0.915337 0.737239 0.8% 0.6% 0.7% 0.7% 0.6% 0.5% G C TC 0.6% 0.1% 0.7% 0.0%
20 GCCG 66.122491 62.323126 35.135144 1.317567 1.077538 1.013585 0.9% 0.8% 0.9% 0.7% 0.7% 0.7% G G CC 0.7% 0.0% 0.8% 0.0%
21 ACTG 77.904190 91.967370 47.553060 2.078000 1.948000 1.501000 1.0% 1.2% 1.2% 1.2% 1.2% 1.0% A G CT 1.1% 0.1% 1.1% 0.1%
22 ACCA 138.199990 121.139990 71.782470 3.758000 3.768000 3.206000 1.8% 1.5% 1.8% 2.1% 2.3% 2.1% A A CC 2.2% 0.1% 1.7% 0.1%
23 ATTA 263.179710 168.197090 112.168170 5.900000 3.391000 4.858000 3.4% 2.1% 2.9% 3.3% 2.1% 3.2% A A TT 2.9% 0.5% 2.8% 0.5%
24 TTTA 388.483500 273.625850 162.425900 7.360000 6.383000 6.530000 5.0% 3.5% 4.1% 4.1% 3.9% 4.3% T A TT 4.1% 0.2% 4.2% 0.6%
25 CTTT 141.914850 191.620670 119.810730 3.917000 3.455000 4.444000 1.8% 2.4% 3.1% 2.2% 2.1% 2.9% C T TT 2.4% 0.4% 2.4% 0.5%
26 GCTT 94.147610 66.013230 31.302250 2.757000 1.716000 2.024000 1.2% 0.8% 0.8% 1.5% 1.1% 1.3% G T CT 1.3% 0.2% 0.9% 0.2%
27 CCTG 117.182540 57.572690 82.292360 1.479000 1.437000 0.944000 1.5% 0.7% 2.1% 0.8% 0.9% 0.6% C G CT 0.8% 0.1% 1.4% 0.6%
28 TCCT 71.653850 135.221120 78.493070 3.560000 2.767000 3.026000 0.9% 1.7% 2.0% 2.0% 1.7% 2.0% T T CC 1.9% 0.1% 1.5% 0.5%
29 ATCC 203.098830 142.238090 21.254700 2.971000 2.933000 1.123000 2.6% 1.8% 0.5% 1.6% 1.8% 0.7% A C TC 1.4% 0.5% 1.7% 0.9%
30 CTCA 247.256040 202.549630 112.960840 5.106000 4.642000 4.940000 3.2% 2.6% 2.9% 2.8% 2.9% 3.3% C A TC 3.0% 0.2% 2.9% 0.3%
31 CCTC 43.887530 74.235420 39.680110 1.900000 1.654000 2.011000 0.6% 0.9% 1.0% 1.1% 1.0% 1.3% C C CT 1.1% 0.1% 0.8% 0.2%
32 GCCT 110.450780 76.867300 29.919360 1.682000 1.822000 1.547000 1.4% 1.0% 0.8% 0.9% 1.1% 1.0% G T CC 1.0% 0.1% 1.1% 0.3%
33 CTTG 111.853020 85.973460 50.354140 2.380000 2.186000 2.454000 1.4% 1.1% 1.3% 1.3% 1.3% 1.6% C G TT 1.4% 0.1% 1.3% 0.1%
34 CCTT 53.477080 72.223530 39.745190 1.726000 1.492000 2.209000 0.7% 0.9% 1.0% 1.0% 0.9% 1.5% C T CT 1.1% 0.2% 0.9% 0.1%
35 ACCT 180.304380 133.248400 62.900210 3.341000 3.163000 2.733000 2.3% 1.7% 1.6% 1.9% 1.9% 1.8% A T CC 1.9% 0.1% 1.9% 0.3%
36 TCCA 211.807980 162.873940 79.380480 4.333000 4.096000 3.352000 2.7% 2.1% 2.0% 2.4% 2.5% 2.2% T A CC 2.4% 0.1% 2.3% 0.3%
37 CTCC 153.281200 156.188210 85.739660 3.925000 3.353000 3.498000 2.0% 2.0% 2.2% 2.2% 2.1% 2.3% C C TC 2.2% 0.1% 2.0% 0.1%
38 GCTC 47.614070 61.405840 22.717460 1.607000 1.696000 1.179000 0.6% 0.8% 0.6% 0.9% 1.0% 0.8% G C CT 0.9% 0.1% 0.7% 0.1%
39 TTCG 128.267030 175.679870 89.567240 4.420000 4.090000 3.601000 1.7% 2.2% 2.3% 2.5% 2.5% 2.4% T G TC 2.4% 0.1% 2.1% 0.3%
40 CTTC 63.171340 112.586500 69.748500 2.636000 2.562000 1.950000 0.8% 1.4% 1.8% 1.5% 1.6% 1.3% C C TT 1.4% 0.1% 1.3% 0.4%
41 ACTT 46.323060 108.000540 46.373770 2.458000 2.198000 1.733000 0.6% 1.4% 1.2% 1.4% 1.4% 1.1% A T CT 1.3% 0.1% 1.0% 0.3%
42 TTCA 259.959450 280.235080 122.166740 7.007000 6.517000 5.291000 3.3% 3.5% 3.1% 3.9% 4.0% 3.5% T A TC 3.8% 0.2% 3.3% 0.2%
43 TTTC 163.210330 185.217910 92.152580 4.125000 3.789000 4.739000 2.1% 2.3% 2.4% 2.3% 2.3% 3.1% T C TT 2.6% 0.4% 2.3% 0.1%
44 GTTT 43.621640 71.385900 39.923990 1.545000 1.483000 1.440000 0.6% 0.9% 1.0% 0.9% 0.9% 1.0% G T TT 0.9% 0.0% 0.8% 0.2%
45 ATTG 115.781598 187.836782 68.942403 2.839360 3.290458 2.602073 1.5% 2.4% 1.8% 1.6% 2.0% 1.7% A G TT 1.8% 0.2% 1.9% 0.4%
46 CCCA 170.207052 212.387274 81.669584 3.538504 4.077260 3.018794 2.2% 2.7% 2.1% 2.0% 2.5% 2.0% C A CC 2.2% 0.2% 2.3% 0.3%
47 CCCC 29.958789 157.278120 55.261468 1.434228 1.823209 1.788980 0.4% 2.0% 1.4% 0.8% 1.1% 1.2% C C CC 1.0% 0.2% 1.3% 0.7%
48 ACCC 114.276567 36.630414 22.434517 1.853543 1.999019 1.715545 1.5% 0.5% 0.6% 1.0% 1.2% 1.1% A C CC 1.1% 0.1% 0.8% 0.5%
49 CTTA 187.945061 269.605715 95.545604 4.191437 4.416941 3.809767 2.4% 3.4% 2.4% 2.3% 2.7% 2.5% C A TT 2.5% 0.2% 2.8% 0.5%
50 TCTT 86.597126 204.412618 45.210934 2.123958 2.071730 1.767746 1.1% 2.6% 1.2% 1.2% 1.3% 1.2% T T CT 1.2% 0.0% 1.6% 0.7%
51 ATCT 132.147581 65.421903 73.218677 2.896695 3.309993 2.761329 1.7% 0.8% 1.9% 1.6% 2.0% 1.8% A T TC 1.8% 0.2% 1.5% 0.5%
52 GCCA 75.617619 121.213170 46.835221 1.815890 2.230175 1.681925 1.0% 1.5% 1.2% 1.0% 1.4% 1.1% G A CC 1.2% 0.2% 1.2% 0.2%
53 TCTG 49.625646 70.571358 39.782595 0.966136 1.495466 1.191769 0.6% 0.9% 1.0% 0.5% 0.9% 0.8% T G CT 0.7% 0.2% 0.8% 0.2%
54 TTCT 146.360140 208.595291 96.355957 3.238137 3.775562 3.544340 1.9% 2.6% 2.5% 1.8% 2.3% 2.3% T T TC 2.2% 0.3% 2.3% 0.3%
55 ATTC 94.178399 121.051848 51.226719 2.090584 2.245369 1.914616 1.2% 1.5% 1.3% 1.2% 1.4% 1.3% A C TT 1.3% 0.1% 1.4% 0.1%
56 GCTA 70.201337 97.454615 38.411979 1.445353 1.677786 1.394378 0.9% 1.2% 1.0% 0.8% 1.0% 0.9% G A CT 0.9% 0.1% 1.0% 0.1%
57 ATCG 92.790773 128.554705 50.632980 2.231782 2.250795 1.942043 1.2% 1.6% 1.3% 1.2% 1.4% 1.3% A G TC 1.3% 0.1% 1.4% 0.2%
58 GTTA 203.342072 189.612783 70.451854 2.933492 2.973567 2.356110 2.6% 2.4% 1.8% 1.6% 1.8% 1.6% G A TT 1.7% 0.1% 2.3% 0.3%
59 TCCG 168.170328 213.101819 79.705543 3.268944 3.856955 3.045336 2.2% 2.7% 2.0% 1.8% 2.4% 2.0% T G CC 2.1% 0.2% 2.3% 0.3%
60 TTCC 230.667043 268.240322 113.053889 4.674077 4.817396 4.179596 3.0% 3.4% 2.9% 2.6% 3.0% 2.8% T C TC 2.8% 0.2% 3.1% 0.2%
61 GTTC 104.373714 116.973540 49.050451 2.146207 2.534044 1.850913 1.3% 1.5% 1.3% 1.2% 1.6% 1.2% G C TT 1.3% 0.2% 1.4% 0.1%
62 GTCG 53.409603 68.781144 28.329351 1.131294 1.280587 0.703382 0.7% 0.9% 0.7% 0.6% 0.8% 0.5% G G TC 0.6% 0.1% 0.8% 0.1%
63 GCTG 64.706729 100.345658 34.297618 1.265646 1.942586 0.706921 0.8% 1.3% 0.9% 0.7% 1.2% 0.5% G G CT 0.8% 0.3% 1.0% 0.2%
/Users/Chen/Documents/research/all data/data analysis/NB UVB.csv
4mers origin1 origin2 origin3 quantity1 quantity2 quantity3 normalized origin1 normalized origin2 normalized origin3 normalized quantity1 normalized quantity2 normalized quantity3 5end 3end CPD quanti mean quanti std origin mean origin std
0 GTTG 25.330738 32.552377 29.499206 1.754515 0.836927 1.264996 0.4% 0.6% 0.9% 1.0% 0.6% 0.8% G G TT 0.8% 0.2% 0.6% 0.2%
1 ACCG 88.020967 67.662505 57.801266 2.695238 1.835261 2.309552 1.5% 1.3% 1.7% 1.5% 1.4% 1.5% A G CC 1.5% 0.1% 1.5% 0.2%
2 TCTA 162.249970 71.271443 50.110593 3.780294 1.918954 2.212478 2.7% 1.3% 1.4% 2.2% 1.4% 1.5% T A CT 1.7% 0.3% 1.8% 0.6%
3 CTCT 45.851506 97.077087 90.981378 3.985022 2.155087 3.102323 0.8% 1.8% 2.6% 2.3% 1.6% 2.0% C T TC 2.0% 0.3% 1.7% 0.8%
4 ACTC 71.315987 21.351595 19.014187 1.481204 0.993104 1.461166 1.2% 0.4% 0.5% 0.8% 0.7% 1.0% A C CT 0.9% 0.1% 0.7% 0.4%
5 GTCA 18.922937 34.428317 30.425975 1.285689 0.659827 1.201291 0.3% 0.6% 0.9% 0.7% 0.5% 0.8% G A TC 0.7% 0.1% 0.6% 0.2%
6 CTCG 50.694604 96.032208 85.056988 3.923604 2.516012 3.238833 0.9% 1.8% 2.5% 2.3% 1.9% 2.1% C G TC 2.1% 0.2% 1.7% 0.7%
7 TCTC 52.568620 41.711657 37.195671 1.741208 0.804048 1.147698 0.9% 0.8% 1.1% 1.0% 0.6% 0.8% T C CT 0.8% 0.2% 0.9% 0.1%
8 GTCT 163.304129 27.588033 20.264566 1.474038 1.034203 1.524871 2.8% 0.5% 0.6% 0.8% 0.8% 1.0% G T TC 0.9% 0.1% 1.3% 1.0%
9 TTTG 117.232921 110.228439 93.257704 4.811098 3.187048 3.908240 2.0% 2.0% 2.7% 2.8% 2.4% 2.6% T G TT 2.6% 0.2% 2.2% 0.3%
10 TTTT 67.664017 56.301531 46.036946 2.440352 1.531875 2.019341 1.1% 1.0% 1.3% 1.4% 1.1% 1.3% T T TT 1.3% 0.1% 1.2% 0.1%
11 ATTT 26.464673 27.223727 21.849122 1.353249 0.842905 1.257917 0.4% 0.5% 0.6% 0.8% 0.6% 0.8% A T TT 0.7% 0.1% 0.5% 0.1%
12 ACTA 64.026096 51.374090 39.398485 2.196727 1.445194 1.646213 1.1% 1.0% 1.1% 1.3% 1.1% 1.1% A A CT 1.1% 0.1% 1.1% 0.1%
13 ATCA 85.592236 67.252037 56.548662 2.632796 1.828536 2.328765 1.4% 1.2% 1.6% 1.5% 1.4% 1.5% A A TC 1.5% 0.1% 1.4% 0.2%
14 CCTA 102.644344 64.550629 54.383534 3.174301 1.756800 2.116415 1.7% 1.2% 1.6% 1.8% 1.3% 1.4% C A CT 1.5% 0.2% 1.5% 0.2%
15 CCCT 75.769261 64.188821 54.050116 2.361532 1.664140 1.953614 1.3% 1.2% 1.6% 1.4% 1.2% 1.3% C T CC 1.3% 0.0% 1.3% 0.2%
16 GCCC 48.091683 35.719993 31.833919 1.414667 0.988620 1.474311 0.8% 0.7% 0.9% 0.8% 0.7% 1.0% G C CC 0.8% 0.1% 0.8% 0.1%
17 CCCG 106.349066 48.505925 48.175418 1.911132 1.194116 1.617900 1.8% 0.9% 1.4% 1.1% 0.9% 1.1% C G CC 1.0% 0.1% 1.4% 0.4%
18 TCCC 52.342032 54.355977 52.002684 2.152710 1.388402 1.887887 0.9% 1.0% 1.5% 1.2% 1.0% 1.2% T C CC 1.2% 0.1% 1.1% 0.3%
19 GTCC 40.124890 28.864690 26.211041 1.055371 0.692707 0.819062 0.7% 0.5% 0.8% 0.6% 0.5% 0.5% G C TC 0.6% 0.0% 0.7% 0.1%
20 GCCG 51.438897 34.593206 32.286525 1.160805 0.839169 0.916136 0.9% 0.6% 0.9% 0.7% 0.6% 0.6% G G CC 0.6% 0.0% 0.8% 0.1%
21 ACTG 27.465550 74.601010 41.859360 1.903000 1.044000 1.444000 0.5% 1.4% 1.2% 1.1% 0.8% 1.0% A G CT 0.9% 0.1% 1.0% 0.4%
22 ACCA 93.667579 77.872440 65.039750 3.122000 3.198000 2.889000 1.6% 1.4% 1.9% 1.8% 2.4% 1.9% A A CC 2.0% 0.3% 1.6% 0.2%
23 ATTA 134.620158 105.852150 105.529290 5.873000 2.532000 5.108000 2.3% 2.0% 3.0% 3.4% 1.9% 3.4% A A TT 2.9% 0.7% 2.4% 0.5%
24 TTTA 285.863999 168.655910 156.118930 7.827000 5.467000 8.241000 4.8% 3.1% 4.5% 4.5% 4.1% 5.4% T A TT 4.7% 0.6% 4.2% 0.7%
25 CTTT 183.002295 122.884010 114.257070 3.815000 3.192000 3.636000 3.1% 2.3% 3.3% 2.2% 2.4% 2.4% C T TT 2.3% 0.1% 2.9% 0.4%
26 GCTT 54.117350 37.834770 28.664340 2.345000 1.420000 2.760000 0.9% 0.7% 0.8% 1.3% 1.1% 1.8% G T CT 1.4% 0.3% 0.8% 0.1%
27 CCTG 30.007320 35.595130 35.829670 1.813000 3.609000 3.076000 0.5% 0.7% 1.0% 1.0% 2.7% 2.0% C G CT 1.9% 0.7% 0.7% 0.2%
28 TCCT 116.396340 82.258870 63.473480 2.973000 2.472000 3.399000 2.0% 1.5% 1.8% 1.7% 1.9% 2.2% T T CC 1.9% 0.2% 1.8% 0.2%
29 ATCC 97.970280 84.408610 56.474590 3.031000 1.739000 1.610000 1.7% 1.6% 1.6% 1.7% 1.3% 1.1% A C TC 1.4% 0.3% 1.6% 0.0%
30 CTCA 174.943370 133.441580 103.278890 5.200000 3.986000 5.095000 3.0% 2.5% 3.0% 3.0% 3.0% 3.4% C A TC 3.1% 0.2% 2.8% 0.2%
31 CCTC 46.226460 41.802180 36.976580 1.707000 1.464000 1.633000 0.8% 0.8% 1.1% 1.0% 1.1% 1.1% C C CT 1.1% 0.1% 0.9% 0.1%
32 GCCT 37.999180 42.646480 22.844000 1.899000 1.691000 1.271000 0.6% 0.8% 0.7% 1.1% 1.3% 0.8% G T CC 1.1% 0.2% 0.7% 0.1%
33 CTTG 75.502720 53.956910 48.573480 2.681000 1.909000 1.768000 1.3% 1.0% 1.4% 1.5% 1.4% 1.2% C G TT 1.4% 0.2% 1.2% 0.2%
34 CCTT 49.809200 41.320670 37.332990 1.582000 1.477000 1.664000 0.8% 0.8% 1.1% 0.9% 1.1% 1.1% C T CT 1.0% 0.1% 0.9% 0.1%
35 ACCT 98.591210 80.259520 48.013900 3.114000 2.471000 1.431000 1.7% 1.5% 1.4% 1.8% 1.9% 0.9% A T CC 1.5% 0.4% 1.5% 0.1%
36 TCCA 137.533200 102.074510 70.119940 3.868000 3.389000 3.405000 2.3% 1.9% 2.0% 2.2% 2.5% 2.2% T A CC 2.3% 0.1% 2.1% 0.2%
37 CTCC 130.515650 97.884400 77.174680 3.888000 2.993000 3.787000 2.2% 1.8% 2.2% 2.2% 2.2% 2.5% C C TC 2.3% 0.1% 2.1% 0.2%
38 GCTC 25.451250 31.130840 20.857510 1.427000 1.049000 1.336000 0.4% 0.6% 0.6% 0.8% 0.8% 0.9% G C CT 0.8% 0.0% 0.5% 0.1%
39 TTCG 114.222640 113.120770 79.971060 4.411000 3.549000 2.324000 1.9% 2.1% 2.3% 2.5% 2.7% 1.5% T G TC 2.2% 0.5% 2.1% 0.2%
40 CTTC 59.004350 70.106500 65.656320 2.564000 2.104000 2.003000 1.0% 1.3% 1.9% 1.5% 1.6% 1.3% C C TT 1.5% 0.1% 1.4% 0.4%
41 ACTT 38.997490 67.877770 42.343880 2.402000 1.679000 1.818000 0.7% 1.3% 1.2% 1.4% 1.3% 1.2% A T CT 1.3% 0.1% 1.0% 0.3%
42 TTCA 218.981510 173.590080 109.084940 6.528000 5.838000 6.387000 3.7% 3.2% 3.1% 3.7% 4.4% 4.2% T A TC 4.1% 0.3% 3.4% 0.2%
43 TTTC 144.734660 123.165650 86.440580 4.345000 3.147000 3.292000 2.4% 2.3% 2.5% 2.5% 2.4% 2.2% T C TT 2.3% 0.1% 2.4% 0.1%
44 GTTT 44.948630 39.057430 37.248270 1.541000 1.465000 1.444000 0.8% 0.7% 1.1% 0.9% 1.1% 1.0% G T TT 1.0% 0.1% 0.9% 0.2%
45 ATTG 114.126677 104.247500 66.130970 3.051393 2.546122 2.868134 1.9% 1.9% 1.9% 1.8% 1.9% 1.9% A G TT 1.9% 0.1% 1.9% 0.0%
46 CCCA 150.011746 163.909636 67.981976 3.476966 3.012389 2.660514 2.5% 3.0% 2.0% 2.0% 2.3% 1.8% C A CC 2.0% 0.2% 2.5% 0.4%
47 CCCC 43.677904 74.903700 53.670889 1.390089 1.989191 1.615406 0.7% 1.4% 1.5% 0.8% 1.5% 1.1% C C CC 1.1% 0.3% 1.2% 0.4%
48 ACCC 74.647771 77.981517 10.725732 1.784267 2.021571 2.231258 1.3% 1.4% 0.3% 1.0% 1.5% 1.5% A C CC 1.3% 0.2% 1.0% 0.5%
49 CTTA 179.369340 219.294514 87.306911 4.339450 3.690203 3.863309 3.0% 4.1% 2.5% 2.5% 2.8% 2.6% C A TT 2.6% 0.1% 3.2% 0.6%
50 TCTT 79.472533 143.429299 38.173938 2.146179 2.125186 1.958811 1.3% 2.7% 1.1% 1.2% 1.6% 1.3% T T CT 1.4% 0.2% 1.7% 0.7%
51 ATCT 116.363350 119.710926 61.347383 2.801108 2.398254 2.703440 2.0% 2.2% 1.8% 1.6% 1.8% 1.8% A T TC 1.7% 0.1% 2.0% 0.2%
52 GCCA 63.458329 85.878798 39.988031 1.816534 2.511583 1.613654 1.1% 1.6% 1.2% 1.0% 1.9% 1.1% G A CC 1.3% 0.4% 1.3% 0.2%
53 TCTG 45.784430 73.712545 29.566623 0.926145 1.159192 1.111687 0.8% 1.4% 0.9% 0.5% 0.9% 0.7% T G CT 0.7% 0.1% 1.0% 0.3%
54 TTCT 128.475926 152.011295 73.815905 3.143834 2.633547 3.039837 2.2% 2.8% 2.1% 1.8% 2.0% 2.0% T T TC 1.9% 0.1% 2.4% 0.3%
55 ATTC 90.413589 92.965009 45.221743 2.256060 1.465720 2.946977 1.5% 1.7% 1.3% 1.3% 1.1% 1.9% A C TT 1.4% 0.4% 1.5% 0.2%
56 GCTA 60.128581 80.100394 34.145873 1.447646 1.172144 2.401208 1.0% 1.5% 1.0% 0.8% 0.9% 1.6% G A CT 1.1% 0.3% 1.2% 0.2%
57 ATCG 81.054713 100.241101 40.915822 2.221177 1.531559 1.681985 1.4% 1.9% 1.2% 1.3% 1.1% 1.1% A G TC 1.2% 0.1% 1.5% 0.3%
58 GTTA 197.726361 141.527828 59.521099 2.962442 2.055030 2.169059 3.3% 2.6% 1.7% 1.7% 1.5% 1.4% G A TT 1.6% 0.1% 2.6% 0.7%
59 TCCG 150.239553 160.610002 54.844550 3.096741 2.839697 2.292580 2.5% 3.0% 1.6% 1.8% 2.1% 1.5% T G CC 1.8% 0.3% 2.4% 0.6%
60 TTCC 203.838401 214.636336 82.509649 4.455436 3.738773 3.483110 3.4% 4.0% 2.4% 2.6% 2.8% 2.3% T C TC 2.6% 0.2% 3.3% 0.7%
61 GTTC 97.085824 98.333457 38.633978 2.105191 1.124654 1.110811 1.6% 1.8% 1.1% 1.2% 0.8% 0.7% G C TT 0.9% 0.2% 1.5% 0.3%
62 GTCG 46.242822 58.577817 22.084397 1.006376 0.728543 0.637753 0.8% 1.1% 0.6% 0.6% 0.5% 0.4% G G TC 0.5% 0.1% 0.8% 0.2%
63 GCTG 50.887918 103.665326 27.535602 1.241836 1.574731 0.749009 0.9% 1.9% 0.8% 0.7% 1.2% 0.5% G G CT 0.8% 0.3% 1.2% 0.5%
In [ ]:
%matplotlib inline
names = ["UVC","BB UVB","FiltBB UVB","NB UVB"]
for j in range(len(names)):
    filepath = '/Users/Chen/Documents/research/all data/data analysis/'
    filename = filepath + '%s.csv' %(names[j])
    df = construct_df(filename)
    

acetone vs NFX

In [54]:
def transform_df_sensitizer(df, PP, sensitizer):
    names = ['A','C','G','T']
    pypy = df[df['CPD'] == PP].sort(['5end','3end'], ascending=True)[sensitizer +' mean'].reshape(4,4)
    pypy = pd.DataFrame(pypy)
    three_ends = pypy.sum(axis = 0)
    five_ends = pypy.sum(axis = 1)
    columns = [ name + ': {:.0%}'.format(three_end) for (name , three_end)in zip(names , three_ends)]
    pypy.columns = columns
    index = [ name + ': {:.0%}'.format(five_end) for (name , five_end)in zip(names , five_ends)]
    pypy.index = index
    pypy.index.name = '5\'X'
    pypy.columns.name = '3\'Y'
    return pypy

1. Contruct sensitizer dataframe

In [60]:
#read file
filepath = '/Users/Chen/Documents/research/all data/data analysis/'
filename = filepath + '%s.csv' %('sensitizer_lane_singlebandnorm')
sensitizer = pd.read_csv(filename,skiprows = 1 ,names = ['4mers','acetone1','acetone2','acetone3','nfx1','nfx2','nfx3'])

#construct full table for sensitizer data
sensitizer["acetone mean"] = np.mean(sensitizer[["acetone1",'acetone2','acetone3']], axis = 1)
sensitizer["nfx mean"] = np.mean(sensitizer[["nfx1",'nfx2','nfx3']], axis = 1)
sensitizer["acetone std"] = np.std(sensitizer[["acetone1",'acetone2','acetone3']], axis = 1)
sensitizer["nfx std"] = np.std(sensitizer[["nfx1",'nfx2','nfx3']], axis = 1)
sensitizer['5end'] = sensitizer['4mers'].apply( lambda x: x[0])
sensitizer['3end'] = sensitizer['4mers'].apply( lambda x: x[-1])
sensitizer = sensitizer.sort(['5end','3end'], ascending=True)
sensitizer['CPD'] = sensitizer['4mers'].apply( lambda x: x[1:3])
sensitizer.style.format({
    'acetone1': '{:,.1%}'.format,
    'acetone2': '{:,.1%}'.format,
    'acetone3': '{:,.1%}'.format,
    'nfx1': '{:,.1%}'.format,
    'nfx2': '{:,.1%}'.format,
    'nfx3': '{:,.1%}'.format,
    'acetone mean': '{:,.1%}'.format,
    'nfx mean': '{:,.1%}'.format,
    })
/Users/Chen/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:13: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
Out[60]:
4mers acetone1 acetone2 acetone3 nfx1 nfx2 nfx3 acetone mean nfx mean acetone std nfx std 5end 3end CPD
4 ATTA 9.4% 19.3% 10.7% 4.8% 2.9% 3.1% 13.1% 3.6% 0.0438034 0.00849512 A A TT
13 ATTC 7.9% 3.8% 7.6% 7.1% 7.4% 6.9% 6.4% 7.2% 0.0189469 0.00224729 A C TT
11 ATTG 5.0% 1.8% 4.8% 11.9% 11.9% 14.6% 3.9% 12.8% 0.0144499 0.0125494 A G TT
3 ATTT 0.8% 1.2% 1.7% 1.9% 2.8% 1.8% 1.2% 2.2% 0.00381707 0.0041309 A T TT
12 CTTA 6.2% 3.1% 6.6% 10.6% 11.0% 9.6% 5.3% 10.4% 0.0157305 0.00555296 C A TT
8 CTTC 6.1% 7.3% 5.2% 2.0% 1.4% 1.6% 6.2% 1.7% 0.00881926 0.00218635 C C TT
7 CTTG 5.0% 6.3% 4.8% 3.6% 2.0% 2.0% 5.4% 2.5% 0.00652288 0.00727735 C G TT
6 CTTT 4.3% 6.8% 5.4% 5.3% 2.1% 8.3% 5.5% 5.2% 0.0103573 0.0252486 C T TT
14 GTTA 4.8% 2.2% 5.8% 9.3% 9.7% 8.8% 4.3% 9.2% 0.0150076 0.00370459 G A TT
15 GTTC 5.7% 2.6% 6.2% 2.3% 1.2% 1.8% 4.8% 1.8% 0.0159415 0.00448104 G C TT
0 GTTG 7.3% 2.8% 4.4% 7.3% 4.3% 2.7% 4.8% 4.7% 0.0188525 0.0193397 G G TT
10 GTTT 3.0% 1.8% 1.6% 1.1% 1.3% 1.5% 2.1% 1.3% 0.00619467 0.00135626 G T TT
5 TTTA 11.3% 11.6% 11.0% 15.0% 21.1% 18.9% 11.3% 18.4% 0.00281232 0.0251228 T A TT
9 TTTC 8.3% 9.5% 6.6% 3.6% 1.8% 2.8% 8.1% 2.8% 0.0121271 0.00750034 T C TT
1 TTTG 11.6% 17.7% 15.2% 10.4% 15.6% 13.2% 14.8% 13.0% 0.025302 0.0211609 T G TT
2 TTTT 3.2% 2.1% 2.6% 3.7% 3.6% 2.5% 2.6% 3.2% 0.00452555 0.00540667 T T TT

2. Plot barplot for acetone

In [78]:
max_y = 0.22
sensitizer = sensitizer.sort(["acetone mean"], ascending=False)

fig = plt.figure()
ax = fig.add_subplot(111)

## the data
N = 16 

## necessary variables
ind = np.arange(N)             # the x locations for the groups
width = 0.4                      # the width of the bars

## the bars
rects1 = ax.bar(ind+width, sensitizer["acetone mean"], width,
                color='black',
                yerr= sensitizer["acetone std"],
                error_kw=dict(elinewidth=0.5,ecolor='red'))
rects2 = ax.bar(ind+2*width, sensitizer["nfx mean"], width,
                color='red',
                yerr= sensitizer["nfx std"],
                error_kw=dict(elinewidth=0.5,ecolor='black'))

# ## the bars
# rects1 = ax.bar(ind, origin_mean, width,
#                 color='black',
#                 yerr=origin_std,
#                 error_kw=dict(elinewidth=0.5,ecolor='red'))

# rects2 = ax.bar(ind+width, quantity_mean, width,
#                     color='red',
#                     yerr= quantity_std,
#                     error_kw=dict(elinewidth=0.5,ecolor='black'))

# axes and labels
ax.set_xlim(0,len(ind)+width)
ax.set_ylim(0, max_y)
ax.set_ylabel('normalized relative Qty', fontsize = 20)
ax.set_title("Photosensitizer Effect on TT CPD Distribution", fontsize = 20)
    
xTickMarks = sensitizer['4mers']
ax.set_xticks(ind+2*width)
xtickNames = ax.set_xticklabels(xTickMarks)  
plt.setp(xtickNames, rotation=90)
    
vals = ax.get_yticks()
ax.set_yticklabels(['{:.0f}%'.format(x*100) for x in vals])

plt.tick_params(axis='both', which='major', labelsize=14)
plt.tick_params(axis='both', which='minor', labelsize=14)

## add a legend
ax.legend( (rects1[0], rects2[0]), ("acetone ","norfloxacin"), fontsize = 14 )

filename = "sensitizer"
plt.savefig(filepath + '%s_barplot.pdf'%(filename), format = 'pdf',dpi=300) 
/Users/Chen/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:2: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  from ipykernel import kernelapp as app
In [ ]:
 

2. Plot heatmap for sensitizer samples

In [180]:
max_y = 0.2
acetone = transform_df_sensitizer(sensitizer, 'TT', 'acetone')
nfx = transform_df_sensitizer(sensitizer, 'TT', 'nfx')
fig = plt.figure(figsize=(32,24 ))
i = 0
fig.subplots_adjust(wspace=.3 )

all_data = [acetone, nfx]    
names = ["2.7M acetone + 20s NB UVB","300uM Norfloxacin + 30min UVA"]
for data in all_data :
    sns.set(font_scale=3.5)
    ax = fig.add_subplot(221 + i)
    ax = sns.heatmap(data,vmin=0, vmax=max_y, fmt = '.0%',annot=True,  annot_kws={"size":40})
    total = np.sum(np.sum(data))
    ax.set_title(names[i] + '\n TT : {:.0%}'.format(total), fontsize = 40)
    i=i+1
        

#filename = 'sensitizer effect'
filename = 'sensitizer'
plt.savefig(filepath + '%s_HEATMAP.pdf'%(filename), format = 'pdf',dpi=300)  
/Users/Chen/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:3: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  app.launch_new_instance()
/Users/Chen/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:3: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead
  app.launch_new_instance()
In [ ]: