Python: import module

import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate
from sklearn.datasets import make_classification

Logistic regression

def l_regression(df):
    #print(df.head())
    #sns.countplot(x='group', data=df)
    #plt.show()

    Y=df['group']
    X=df.iloc[:,2:(df.shape[1]+1)]

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)

    log_clf = LogisticRegression(solver='lbfgs')
    log_clf.fit(X_train,Y_train)
    score=log_clf.score(X_test, Y_test)
    CR='\033[41m'; CE='\033[0m'
    #print('The data can be divided with'+CR,round(score*100,2),CE+'% accuracy.')

    return score

def plot_accuracy(bfile1,bfile2):
    df = pd.read_csv('./data1/'+bfile1+'.csv')
    scoreList=[]
    for n in range(30):
        score=l_regression(df)
        scoreList.append(score)
    plt.plot(scoreList,label=bfile1+' : '+str(round(np.mean(score),2)))

    df = pd.read_csv('./data1/'+bfile2+'.csv')
    scoreList=[]
    for n in range(30):
        score=l_regression(df)
        scoreList.append(score)
    plt.plot(scoreList,label=bfile2+' : '+str(round(np.mean(score),2)))

    plt.legend()
    plt.ylim(0,1)
    plt.show()

Hierarchy heatmap

def plot_heatmap(bfile1,bfile2):
    df=pd.read_csv('./data1/'+bfile1+'.csv').set_index('id').drop(columns='group')

    #sns.heatmap(df, annot=True, linewidths=1, linecolor='black')
    sns.set(font_scale=1)
    sns.clustermap(df,cmap='RdBu_r')
    plt.show()

Correlation

def correlation(df):
    corr = df.iloc[:,1:].corr(method = 'pearson')
    mask = np.zeros_like(corr, dtype=np.bool)
    mask[np.triu_indices_from(mask)] = True
    cmap = sns.diverging_palette(220, 10, as_cmap=True)

    #sns.distplot(df['price'])
    #sns.pairplot(df)
    sns.heatmap(corr, mask=mask, cmap=cmap, annot=True, linewidths=1, linecolor='black')#, fmt='d') # format=integer
    #plt.title('Correlation', fontsize=20)
    plt.show()

Scatter plot

def model_regression(df,lst,cnt,color,lgd):
    fig, ax = plt.subplots(3,3)
    model = LinearRegression(fit_intercept=False)

    for j in enumerate(lst):
        a1=[df[j[1]].iloc[cnt-1]]
        for i in range(cnt-2,-1,-1):
            a1min=min(a1)
            a1max=max(a1)
            m=max(abs(a1max),abs(a1min))
            a1.append(a1[-1]+df[j[1]].iloc[i])
        ax.flat[j[0]].scatter(df['price'],a1,c=color[j[1]-1],label=lgd[j[1]-1],marker='.',edgecolors='k',alpha=0.6,linewidths=0.8)
        ax.flat[j[0]].legend()
        #r_sq = model.score(df['price'],a1)
        #ax.flat[r_sq].set_title()

    plt.show()