'''
Utilities functions
'''
import torch
import numpy
import numpy as np
import os
import os.path as osp
import pickle
import argparse
from scipy.stats import ortho_group
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
#import seaborn as sns
#import pandas as pd

import pdb

#parse the configs from config file

def read_config():
    with open('config', 'r') as file:
        lines = file.readlines()
    
    name2config = {}
    for line in lines:
        
        if line[0] == '#' or '=' not in line:
            continue
        line_l = line.split('=')
        name2config[line_l[0].strip()] = line_l[1].strip()
    m = name2config
    if 'kahip_dir' not in m or 'data_dir' not in m or 'glove_dir' not in m or 'sift_dir' not in m:
        raise Exception('Config must have kahip_dir, data_dir, glove_dir, and sift_dir')
    return name2config

name2config = read_config()

device = 'cuda' if torch.cuda.is_available() else 'cpu'

kahip_dir = name2config['kahip_dir'] 
graph_file = 'knn.graph'
data_dir = name2config['data_dir'] 

parts_path = osp.join(data_dir, 'partition', '')
dsnode_path = osp.join(data_dir, 'train_dsnode')

glove_dir = name2config['glove_dir'] 
sift_dir = name2config['sift_dir'] 

#starter numbers
N_CLUSTERS = 256 #16
N_HIDDEN = 512
#for reference, this is 128 for sift, 784 for mnist, and 100 for glove
N_INPUT = 128 

'''                                                        
One unified parse_args to encure consistency across different components.
Returns opt.
'''
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--n_clusters', default=N_CLUSTERS, type=int, help='number of cluseters' )
    parser.add_argument('--kahip_config', default='strong', help='fast, eco, or strong' )
    parser.add_argument('--parts_path_root', default=parts_path, help='path root to partition')
    parser.add_argument('--dsnode_path', default=dsnode_path, help='path to datanode dsnode for training')
    parser.add_argument('--k', default=10, type=int, help='number of neighbors during training')
    parser.add_argument('--k_graph', default=10, type=int, help='number of neighbors to construct knn graph')
    parser.add_argument('--subsample', default=1, type=int, help='subsample frequency, 1 means original dataset')
    
    #parser.add_argument('--nn_graph50', default=True, help='Whether to use 50NN graph for partitioning')
    parser.add_argument('--nn_mult', default=5, type=int, help='multiplier for opt.k to create distribution of bins of nearest neighbors during training. For MLCE loss.')
    parser.add_argument('--data_dir', default=data_dir, help='data dir')
    parser.add_argument('--graph_file', default=graph_file, help='file to store knn graph')

    parser.add_argument('--glove', default=False, help='whether using glove data')
    parser.add_argument('--glove_c', default=True, help='whether using glove data')
    parser.add_argument('--sift_c', default=False, help='whether using glove data')
    parser.add_argument('--sift', default=False, help='whether using SIFT data')
    parser.add_argument('--prefix10m', default=False, help='whether using prefix10m data')
    
    parser.add_argument('--fast_kmeans', default=False, help='whether using fast kmeans, non-sklearn')
    parser.add_argument('--itq', default=False, help='whether using ITQ solver')
    parser.add_argument('--cplsh', default=True, help='whether using cross polytope LSH solver')
    parser.add_argument('--pca', default=False, help='whether using PCA solver')
    parser.add_argument('--st', default=False, help='whether using ST (search tree) solver')
    parser.add_argument('--rp', default=False, help='whether using random projection solver')
    parser.add_argument('--kmeans_use_kahip_height', default=-2, type=int, help='height if kmeans using kahip height, i.e. for combining kahip+kmeans methods')
    parser.add_argument('--compute_gt_nn', default=False, help='whether to compute ground-truth for dataset points. Ground truth partitions instead of learned, ie if everything were partitioned by kahip')    
    
    #meta and more hyperparameters
    parser.add_argument('--write_res', default=True, help='whether to write acc and probe count results for kmeans')
    parser.add_argument('--normalize_data', default=False, help='whether to normalize input data')
    #parser.add_argument('--normalize_feature', default=True, help='whether to scale features')
    parser.add_argument('--max_bin_count', default=70, type=int, help='max bin count for kmeans') #default=160
    parser.add_argument('--acc_thresh', default=0.95, type=float, help='acc threshold for kmeans')
    parser.add_argument('--n_repeat_km', default=3, type=int, help='number of experimental repeats for kmeans')
    
    #params for training
    parser.add_argument('--n_input', default=N_INPUT, type=int, help='dimension of neural net input')
    parser.add_argument('--n_hidden', default=N_HIDDEN, type=int, help='hidden dimension')
    parser.add_argument('--n_class', default=N_CLUSTERS, type=int, help='number of classes for trainig')
    parser.add_argument('--n_epochs', default=1, type=int, help='number of epochs for trainig') #35
    parser.add_argument('--lr', default=0.0008, type=float, help='learning rate')    

    opt = parser.parse_args()
    
    if opt.glove:    
        opt.n_input = 100
    elif opt.glove_c:
        opt.n_input = 100        
    elif opt.sift or opt.sift_c:
        opt.n_input = 128
    elif opt.prefix10m:
        opt.n_input = 96 
    else:
        opt.n_input = 784 #for mnist
    #raise exception
    
    if (opt.glove or opt.glove_c) and not opt.normalize_data:
        print('GloVe data must be normalized! Setting normalize_data to True...')
        opt.normalize_data = True
        
    if opt.glove and opt.sift:        
        raise Exception('Must choose only one of opt.glove and opt.sift!')
    
    if not opt.fast_kmeans^opt.itq:
        #raise Exception('Must choose only one of opt.fast_kmeans and opt.itq!')
        print('NOTE: fast_kmeans and itq options share the same value')
        
    if not opt.fast_kmeans:
        print('NOTE: fast_kmeans not enabled')
        
    return opt 

class NestedList:
    def __init__(self):
        self.master = {}

    def add_list(self, l, idx):
        if not isinstance(l, list):
            raise Exception('Must add list to ListWrapper!')
        self.master[idx] = l
        
    def get_list(self, idx):
        return self.master[idx]
'''
l2 normalize along last dim
Input: torch tensor.
'''
def normalize(vec):
    norm = vec.norm(p=2, dim=-1, keepdim=True)
    return vec/norm

def normalize_np(vec):
    norm = numpy.linalg.norm(vec, axis=-1, keepdims=True)    
    return vec/norm

'''
Cross polytope LSH
To find the part, Random rotation followed by picking the nearest spherical
lattice point after normalization, ie argmax, not up to sign.
Input:
-M: projection matrix
-n_clusters, must be divisible by 2.
'''
def polytope_lsh(X, n_clusters):
    #random orthogonal rotation
    
    #M = torch.randn(X.size(-1), proj_dim)
    M = torch.from_numpy(ortho_group.rvs(X.size(-1)))
    proj_dim = n_clusters / 2
    M = M[:, :proj_dim]
    X = torch.mm(X, M)    
    #X = X[:, :proj_dim]
    
    max_idx = torch.argmax(X.abs(), dim=-1) #check dim!
    max_entries = torch.gather(X, dim=-1, index=max_idx)
    #now in range e.g. [-8, 8]
    max_idx[max_entries<0] = -max_idx[max_entries<0]
    max_idx += proj_dim
    return M, max_idx.view(-1)

'''
get ranking using cross polytope info.
Input:
-q: query input, 2D tensor
-M: projection mx. 2D tensor. d x n_total_clusters/2
'''
def polytope_rank(q, M, n_bins):  
    q = torch.mm(q, M)
    n_queries, d = q.size(0), q.size(0)
    q = q.view(-1)
    bases = torch.eye(d, device=device)
    bases = torch.cat((bases, -bases), dim=0)
    bases_exp = bases.unsqueeze(0).expand(n_queries, 2*d, d)
    #multiply in last dimension
    idx = torch.topk((bases_exp*q).sum(-1), k=n_bins, dim=-1)
    return idx    

'''
Compute histograms of distances to the mth neighbor. Useful for e.g.
after catalyzer processing.
Input:
-X: data
-q: queries
-m: the mth neighbor to take distance to.
'''
def plot_dist_hist(X, q, m, data_name):    
    dist = l2_dist(q, X)    
    dist, ranks = torch.topk(dist, k=m, dim=-1, largest=False)    
    dist = dist / dist[:, 0].unsqueeze(-1)
    #first look at the mean and median of distances
    mth_dist = dist[:, m-1]
    plt.hist(mth_dist.cpu().numpy(), bins=100, label=str(m)+'th neighbor')
    plt.xlabel('distance')
    plt.ylabel('count')
    plt.xlim(0, 4)
    plt.ylim(0, 140)
    plt.title('Dist to {}^th nearest neighbor'.format(m))
    plt.grid(True)
    fig_path = osp.join(data_dir, '{}_dist_{}_hist.jpg'.format(data_name, m))
    plt.savefig(fig_path)
    print('fig saved {}'.format(fig_path))
    #pdb.set_trace()
    return mth_dist, plt
'''
Plot distance scatter plot, *up to* m^th neighbor, normalized by nearest neighbor dist.
'''
def plot_dist_hist_upto(X, q, m, data_name): 
    dist = l2_dist(q, X)    
    dist, ranks = torch.topk(dist, k=m, dim=-1, largest=False)    
    dist = dist / dist[:, 0].unsqueeze(-1)
    #first look at the mean and median of distances
    m_dist = dist[:, :m]
    m_dist = m_dist.mean(0)

    df = pd.DataFrame({'k':list(range(m)), 'dist':m_dist.cpu().numpy()})
    fig = sns.scatterplot(x='k', y='dist', data=df, label=data_name)
    fig.figure.legend()
    fig.set_title('{}: distance wrt k up to {}'.format(data_name, m))
    fig_path = osp.join(data_dir, '{}_dist_upto{}.jpg'.format(data_name, m))
    fig.figure.savefig(fig_path)
    print('figure saved under {}'.format(fig_path))
    
    
    '''
    plt.hist(mth_dist.cpu().numpy(), bins=100, label=str(m)+'th neighbor')
    plt.xlabel('distance')
    plt.ylabel('count')
    plt.xlim(0, 4)
    plt.ylim(0, 140)
    plt.title('Dist to {}^th nearest neighbor'.format(m))
    plt.grid(True)
    fig_path = osp.join(data_dir, '{}_dist_{}_hist.jpg'.format(data_name, m))
    plt.savefig(fig_path)
    print('fig saved {}'.format(fig_path))
    #pdb.set_trace()
    return mth_dist, plt
    '''
    
'''
Type can be query, train, and or answers.
'''
def load_data_dep(type='query'):
    if type == 'query':
        return torch.from_numpy(np.load(osp.join(data_dir, 'queries_unnorm.npy')))
    elif type == 'answers':
        #answers are NN of the query points
        return torch.from_numpy(np.load(osp.join(data_dir, 'answers_unnorm.npy')))
    elif type == 'train':
        return torch.from_numpy(np.load(osp.join(data_dir, 'dataset_unnorm.npy')))
    else:
        raise Exception('Unsupported data type')

'''
All data are normalized.
glove_dir : '~/partition/glove-100-angular/normalized'
'''
def load_glove_data(type='query', opt=None):    
    if type == 'query':
        return torch.from_numpy(np.load(osp.join(data_dir, 'glove_queries.npy')))
    elif type == 'answers':
        #answers are NN of the query points
        return torch.from_numpy(np.load(osp.join(data_dir, 'glove_answers.npy')))
    elif type == 'train':
        data = torch.from_numpy(np.load(osp.join(data_dir, 'glove_dataset.npy')))
        if opt is not None and opt.subsample > 1:
            #load subsampled indices
            sub_idx = torch.load(' ')
            data = data[sub_idx]
        return data
    else:
        raise Exception('Unsupported data type')
    
def load_glove_sub_data(type='query', opt=None):    
    if type == 'query':
        return torch.from_numpy(np.load(osp.join(data_dir, 'glove_queries.npy')))
    elif type == 'answers':
        #answers are NN of the query points
        sub_idx = torch.load('data/sub10_glove_idx.pt')
        data = torch.from_numpy(np.load(osp.join(data_dir, 'glove_dataset.npy')))
        data = data[sub_idx]
        query = torch.from_numpy(np.load(osp.join(data_dir, 'glove_queries.npy')))
        answers = dist_rank(query, k=10, data_y=data)
        
        #return torch.from_numpy(np.load(osp.join(data_dir, 'glove_answers.npy')))
        return answers
    elif type == 'train':
        data = torch.from_numpy(np.load(osp.join(data_dir, 'glove_dataset.npy')))
        if True or opt is not None and opt.subsample > 1:
            #load subsampled indices
            sub_idx = torch.load('data/sub10_glove_idx.pt')
            data = data[sub_idx]
        return data
    else:
        raise Exception('Unsupported data type')

'''
catalyzer'd glove data
'''
def load_glove_c_data(type='query'):
    if type == 'query':
        return torch.from_numpy(np.load(osp.join(data_dir, 'glove_c0.08_queries.npy')))
    elif type == 'answers':
        #answers are NN of the query points
        return torch.from_numpy(np.load(osp.join(data_dir, 'glove_answers.npy')))
    elif type == 'train':
        return torch.from_numpy(np.load(osp.join(data_dir, 'glove_c0.08_dataset.npy')))
    else:
        raise Exception('Unsupported data type')

def load_sift_c_data(type='query'):
    if type == 'query':
        return torch.from_numpy(np.load(osp.join(data_dir, 'sift_c_queries.npy')))
    elif type == 'answers':
        #answers are NN of the query points
        return torch.from_numpy(np.load(osp.join(data_dir, 'sift_answers.npy')))
    elif type == 'train':
        return torch.from_numpy(np.load(osp.join(data_dir, 'sift_c_dataset.npy')))
    else:
        raise Exception('Unsupported data type')

    
'''
All data are normalized.
glove_dir : '~/partition/glove-100-angular/normalized'
'''
def load_sift_data(type='query'):
    if type == 'query':
        return torch.from_numpy(np.load(osp.join(data_dir, 'sift_queries_unnorm.npy')))
    elif type == 'answers':
        #answers are NN of the query points
        return torch.from_numpy(np.load(osp.join(data_dir, 'sift_answers_unnorm.npy')))
    elif type == 'train':
        return torch.from_numpy(np.load(osp.join(data_dir, 'sift_dataset_unnorm.npy')))
    else:
        raise Exception('Unsupported data type')
'''
'''
def load_prefix10m_data(type='query', opt=None):    
    if type == 'query':
        return torch.from_numpy(np.load(osp.join(data_dir, 'prefix10m_queries.npy')))
    elif type == 'answers':
        #answers are NN of the query points
        return torch.from_numpy(np.load(osp.join(data_dir, 'prefix10m_answers.npy')))
    elif type == 'train':
        data = torch.from_numpy(np.load(osp.join(data_dir, 'prefix10m_dataset.npy')))
        if opt is not None and opt.subsample > 1:
            #load subsampled indices
            sub_idx = torch.load(' ')
            data = data[sub_idx]
        return data
    else:
        raise Exception('Unsupported data type')

'''
Glove data according
Input:
-n_parts: number of parts.
'''
def glove_top_parts_path(n_parts, opt):
    if n_parts not in [2, 4, 8, 16, 32, 64, 128, 256, 512]:
        raise Exception('Glove partitioning has not been precomputed for {} parts.'.format(n_parts))
    if opt.subsample > 1:
        return osp.join(data_dir, 'partition', '16strongglove0ht1_sub10')
        ##return osp.join(glove_dir, 'partition_{}_{}'.format(n_parts, strength), 'partition{}.txt'.format(opt.subsample))
    strength = 'strong' #'eco' if n_parts in [128, 256] else 'strong'
    if opt.k_graph == 10:
        glove_top_parts_path = osp.join(glove_dir, 'partition_{}_{}'.format(n_parts, strength), 'partition.txt')
    elif opt.k_graph == 50:        
        glove_top_parts_path = osp.join(glove_dir, '50', 'partition_{}_{}'.format(n_parts, strength), 'partition.txt')        
    else:
        raise Exception('knn graph for k={} not supported'.format(opt.k_graph))
    return glove_top_parts_path

'''
SIFT partitioning.
Input:
-n_parts: number of parts.
'''
def sift_top_parts_path(n_parts, opt):
    if n_parts not in [2, 4, 8, 16, 32, 64, 128, 256]:
        raise Exception('SIFT partitioning has not been precomputed for {} parts.'.format(n_parts))
    
    #strength = 'eco' if n_parts in [128, 256] else 'strong'
    strength = 'strong'    
    if opt.k_graph == 10:
        sift_top_parts_path = osp.join(data_dir, 'partition_{}_{}'.format(n_parts, strength), 'partition.txt')
    elif opt.k_graph == 50:
        raise Exception('knn graph')
    else:
        raise Exception('knn graph for k={} not supported'.format(opt.k_graph))
    
    return sift_top_parts_path

def prefix10m_top_parts_path(n_parts, opt):
    if n_parts not in [8]:
        raise Exception('SIFT partitioning has not been precomputed for {} parts.'.format(n_parts))
    
    #strength = 'eco' if n_parts in [128, 256] else 'strong'
    strength = 'strong'    
    if opt.k_graph == 10:
        sift_top_parts_path = osp.join(data_dir, 'partition_{}_{}'.format(n_parts, strength), 'prefix10m_partition.txt')
    else:
        raise Exception('knn graph for k={} not supported'.format(opt.k_graph))
    
    return sift_top_parts_path

'''
Memory-compatible. 
Ranks of closest points not self.
Uses l2 dist. But uses cosine dist if data normalized. 
Input: 
-data: tensors
-data_y: data to search in
-specify k if only interested in the top k results.
-largest: whether pick largest when ranking. 
-include_self: include the point itself in the final ranking.
'''
def dist_rank(data_x, k, data_y=None, largest=False, opt=None, include_self=False):

    if isinstance(data_x, np.ndarray):
        data_x = torch.from_numpy(data_x)

    if data_y is None:
        data_y = data_x
    else:
        if isinstance(data_y, np.ndarray):
            data_y = torch.from_numpy(data_y)
    k0 = k
    device_o = data_x.device
    data_x = data_x.to(device)
    data_y = data_y.to(device)
    
    (data_x_len, dim) = data_x.size()
    data_y_len = data_y.size(0)
    #break into chunks. 5e6  is total for MNIST point size
    #chunk_sz = int(5e6 // data_y_len)
    chunk_sz = 16384
    chunk_sz = 300 #700 mem error. 1 mil points
    if data_y_len > 990000:
        chunk_sz = 90 #50 if over 1.1 mil
        #chunk_sz = 500 #1000 if over 1.1 mil 
    else:
        chunk_sz = 3000    

    if k+1 > len(data_y):
        k = len(data_y) - 1
    #if opt is not None and opt.sift:
    if device == 'cuda':
        dist_mx = torch.cuda.LongTensor(data_x_len, k+1)
    else:
        dist_mx = torch.LongTensor(data_x_len, k+1)
    data_normalized = True if opt is not None and opt.normalize_data else False
    largest = True if largest else (True if data_normalized else False)
    
    #compute l2 dist <--be memory efficient by blocking
    total_chunks = int((data_x_len-1) // chunk_sz) + 1
    #print('total chunks ', total_chunks)
    y_t = data_y.t()
    if not data_normalized:
        y_norm = (data_y**2).sum(-1).view(1, -1)
    del data_y
    
    for i in range(total_chunks):
        base = i*chunk_sz
        upto = min((i+1)*chunk_sz, data_x_len)
        cur_len = upto-base
        x = data_x[base : upto]
        
        if not data_normalized:
            x_norm = (x**2).sum(-1).view(-1, 1)        
            #plus op broadcasts
            dist = x_norm + y_norm        
            dist -= 2*torch.mm(x, y_t)
            del x_norm
        else:
            dist = -torch.mm(x, y_t)
            
        topk = torch.topk(dist, k=k+1, dim=1, largest=largest)[1]
                
        dist_mx[base:upto, :k+1] = topk #torch.topk(dist, k=k+1, dim=1, largest=largest)[1][:, 1:]
        del dist
        del x
        if i % 500 == 0:
            print('chunk ', i)

    topk = dist_mx
    if k > 3 and opt is not None and opt.sift:
        #topk = dist_mx
        #sift contains duplicate points, don't run this in general.
        identity_ranks = torch.LongTensor(range(len(topk))).to(topk.device)
        topk_0 = topk[:, 0]
        topk_1 = topk[:, 1]
        topk_2 = topk[:, 2]
        topk_3 = topk[:, 3]

        id_idx1 = topk_1 == identity_ranks
        id_idx2 = topk_2 == identity_ranks
        id_idx3 = topk_3 == identity_ranks

        if torch.sum(id_idx1).item() > 0:
            topk[id_idx1, 1] = topk_0[id_idx1]

        if torch.sum(id_idx2).item() > 0:
            topk[id_idx2, 2] = topk_0[id_idx2]

        if torch.sum(id_idx3).item() > 0:
            topk[id_idx3, 3] = topk_0[id_idx3]           

    
    if not include_self:
        topk = topk[:, 1:]
    elif topk.size(-1) > k0:
        topk = topk[:, :-1]
    topk = topk.to(device_o)
    return topk

'''
Expected distance between point and its neighbor
'''
def compute_alpha_beta(data_x, k):
    
    data_y = data_x
    data_x_len = len(data_x)
    mean_dist_a = torch.zeros(len(data_x), device=device)
    mean_dist_b = torch.zeros(len(data_x), device=device)
    batch_sz = 700
    y_norm = (data_y**2).sum(-1).unsqueeze(0)
    data_y = data_y.t()
    for i in range(0, data_x_len, batch_sz):
        j = min(data_x_len, i+batch_sz)
        x = data_x[i : j]
        x_norm = (x**2).sum(-1).unsqueeze(-1)
        cur_dist = x_norm + y_norm -  2 * torch.mm(x, data_y)
        del x_norm
        del x
        #top dist includes 0
        top_dist, _ = torch.topk(cur_dist, k+1, largest=False)
        mean_dist_a[i:j] = (top_dist/k).sum(-1)
        mean_dist_b[i:j] = (cur_dist/(data_x_len-1)).sum(-1)
        
        
    return mean_dist_a.mean(), mean_dist_b.mean()

'''
Compute degrees distribution, ie for each point, how many points
there are that have this point as one of its near neighbors. 
'''
def compute_degree_distr(data_x, k):
    
    data_y = data_x
    data_x_len = len(data_x)
    mean_dist_a = torch.zeros(len(data_x), device=device)
    mean_dist_b = torch.zeros(len(data_x), device=device)
    batch_sz = 700
    y_norm = (data_y**2).sum(-1).unsqueeze(0)
    data_y = data_y.t()
    degrees = torch.zeros(data_x_len, device=device)
    
    for i in range(0, data_x_len, batch_sz):
        j = min(data_x_len, i+batch_sz)
        x = data_x[i : j]
        x_norm = (x**2).sum(-1).unsqueeze(-1)
        cur_dist = x_norm + y_norm -  2 * torch.mm(x, data_y)
        del x_norm
        del x
        #top dist includes 0
        top_dist, ranks = torch.topk(cur_dist, k+1, largest=False)
        ones = torch.ones(j-i, k+1, device=device)
        
        degrees = torch.scatter_add(degrees, dim=0, index=ranks.view(-1), src=ones.view(-1))
        
        #mean_dist_a[i:j] = (top_dist/k).sum(-1)
        #mean_dist_b[i:j] = (cur_dist/(data_x_len-1)).sum(-1)
    
    distribution = torch.zeros(data_x_len//3, device=device)
    ones = torch.ones(data_x_len, device=device)
    distribution = torch.scatter_add(distribution, dim=0, index=(degrees-1).long(), src=ones)
    pdb.set_trace()
    return distribution

'''
Memory-compatible. 
Input: 
-data: tensors
-data_y: if None take dist from data_x to itself
'''
def l2_dist(data_x, data_y=None):

    if data_y is not None:
        return _l2_dist2(data_x, data_y)
    else:
        return _l2_dist1(data_x)
   
'''
Memory-compatible, when insufficient GPU mem. To be combined with _l2_dist2 later.
Input: 
-data: tensor
'''
def _l2_dist1(data):

    if isinstance(data, numpy.ndarray):
        data = torch.from_numpy(data)
    (data_len, dim) = data.size()
    #break into chunks. 5e6  is total for MNIST point size
    chunk_sz = int(5e6 // data_len)    
    dist_mx = torch.FloatTensor(data_len, data_len)
    
    #compute l2 dist <--be memory efficient by blocking
    total_chunks = int((data_len-1) // chunk_sz) + 1
    y_t = data.t()
    y_norm = (data**2).sum(-1).view(1, -1)
    
    for i in range(total_chunks):
        base = i*chunk_sz
        upto = min((i+1)*chunk_sz, data_len)
        cur_len = upto-base
        x = data[base : upto]
        x_norm = (x**2).sum(-1).view(-1, 1)
        #plus op broadcasts
        dist_mx[base:upto] = x_norm + y_norm - 2*torch.mm(x, y_t)
        

    return dist_mx

'''
Memory-compatible.
Input: 
-data: tensor
'''
def _l2_dist2(data_x, data_y):

    (data_x_len, dim) = data_x.size()
    data_y_len = data_y.size(0)
    #break into chunks. 5e6  is total for MNIST point size
    chunk_sz = int(5e6 // data_y_len)
    dist_mx = torch.FloatTensor(data_x_len, data_y_len)
    
    #compute l2 dist <--be memory efficient by blocking
    total_chunks = int((data_x_len-1) // chunk_sz) + 1
    y_t = data_y.t()
    y_norm = (data_y**2).sum(-1).view(1, -1)
    
    for i in range(total_chunks):
        base = i*chunk_sz
        upto = min((i+1)*chunk_sz, data_x_len)
        cur_len = upto-base
        x = data_x[base : upto]
        x_norm = (x**2).sum(-1).view(-1, 1)
        #plus op broadcasts
        dist_mx[base:upto] = x_norm + y_norm - 2*torch.mm(x, y_t)
        
        #data_x = data[base : upto].unsqueeze(cur_len, data_len, dime(1).expand(cur_len, data_len, dim)
        #                                    )
    return dist_mx

 
'''
convert numpy array or list to markdown table
Input:
-numpy array (or two-nested list)
-s

'''
def mx2md(mx, row_label, col_label):
    #height, width = mx.shape
    height, width = len(mx), len(mx[0])
    
    if height != len(row_label) or width != len(col_label):
        raise Exception('mx2md: height != len(row_label) or width != len(col_label)')

    l = ['-']
    l.extend([str(i) for i in col_label])
    rows = [l]
    rows.append(['---' for i in range(width+1)])
    
    for i, row in enumerate(mx):
        l = [str(row_label[i])]
        l.extend([str(j) for j in mx[i]])
        rows.append(l)
        
    md = '\n'.join(['|'.join(row) for row in rows])
    #md0 = ['\n'.join(row) for row in rows]
    return md

'''
convert multiple numpy arrays or lists of same shape to markdown table
Input:
-numpy array (or two-nested list)

'''
def mxs2md(mx_l, row_label, col_label):
        
    height, width = len(mx_l[0]), len(mx_l[0][0])

    for i, mx in enumerate(mx_l, 1):
        if (height, width) != (len(mx), len(mx[0])):
            raise Exception('shape mismatch: height != len(row_label) or width != len(col_label)')
    
    if height != len(row_label) or width != len(col_label):
        raise Exception('mx2md: height != len(row_label) or width != len(col_label)')

    l = ['-']
    l.extend([str(i) for i in col_label])
    rows = [l]
    rows.append(['---' for i in range(width+1)])

    for i, row in enumerate(mx):
        l = [str(row_label[i])]
                    
        #l.extend([str(j) for j in mx_k[i]])
        l.extend([' / '.join([str(mx_k[i][j]) for mx_k in mx_l]) for j in range(width)])
        rows.append(l)
        
    md = '\n'.join(['|'.join(row) for row in rows])
    #md0 = ['\n'.join(row) for row in rows]
    return md

def load_lines(path):
    with open(path, 'r') as file:
        lines = file.read().splitlines()
    return lines

'''                            
Input: lines is list of objects, not newline-terminated yet.                                                                        
'''
def write_lines(lines, path):
    lines1 = []
    for line in lines:
        lines1.append(str(line) + os.linesep)
    with open(path, 'w') as file:
        file.writelines(lines1)

def pickle_dump(obj, path):
    with open(path, 'wb') as file:
        pickle.dump(obj, file)

def pickle_load(path):
    with open(path, 'rb') as file:
        return pickle.load(file)

    
if __name__ == '__main__':
    mx1 = np.zeros((2,2))
    mx2 = np.ones((2,2))
    
    row = ['1','2']
    col = ['3','4']
    
    print(mxs2md([mx1,mx2], row, col))