# knn.py - Simple k-nearest neighbour classifier
# COMP8400 assignemtn2, 2009
#
# Author: Peter Christen, May 2009
# -----------------------------------------------------------------------------

import math  # Reguired Python modules
import sys

# ------------------------------------------------------------------------------
# First the functions of the three distance measures

def distMan(vec1, vec2):  # Manhatten distance measure - - - - - - - - - - - - -
  assert len(vec1) == len(vec2)

  vec_len = len(vec1)
  man_dist = 0.0

  for i in range(vec_len):
    man_dist += abs(float(vec1[i]) - float(vec2[i]))

  return man_dist

def distEuc(vec1, vec2):  # Euclidean distance measure - - - - - - - - - - - - -
  assert len(vec1) == len(vec2)

  vec_len = len(vec1)
  euc_dist = 0.0

  for i in range(vec_len):
    x = float(vec1[i]) - float(vec2[i])
    euc_dist += x*x

  return math.sqrt(euc_dist)

def distCan(vec1, vec2):  # Canberra distance measure - - - - - - - - - - - - -
  assert len(vec1) == len(vec2)

  vec_len = len(vec1)
  cbr_dist = 0.0

  for i in range(vec_len):
    x = abs(float(vec1[i]) - float(vec2[i]))
    y = abs(float(vec1[i])) + abs(float(vec2[i]))
    if (y > 0.0):
      cbr_dist += x/y

  return cbr_dist

# ------------------------------------------------------------------------------
# Start of main program

in_file = sys.argv[1]  # Get file name from command line

# Step 1: Read file and its content - - - - - - - - - - - - - - - - - - - - - -
#
in_fp = open(in_file)

k = int(in_fp.readline().strip())
print 'k =', k

dim = int(in_fp.readline().strip())
print 'dim =', dim

dist = in_fp.readline().strip().lower()
if (dist[:3] not in ['euc','man','can']):
  print 'Error: Illegale distance measure:', dist
print 'Distance:', dist
print

num_train = int(in_fp.readline().strip())
print 'Number of training records =', num_train

train_data = []

for i in range(num_train):
  in_rec_list = in_fp.readline().strip().split(',')
  assert (len(in_rec_list) == dim+1)

  train_rec = []
  for j in range(len(in_rec_list)):
    d = float(in_rec_list[j].strip())
    train_rec.append(d)

  train_data.append(train_rec)

num_test = int(in_fp.readline().strip())
print 'Number of test records =', num_test

test_data = []

for i in range(num_test):
  in_rec_list = in_fp.readline().strip().split(',')
  assert (len(in_rec_list) == dim)

  test_rec = []
  for j in range(len(in_rec_list)):
    d = float(in_rec_list[j].strip())
    test_rec.append(d)

  test_data.append(test_rec)

print

# Step 2: Select the chosen distance function - - - - - - - - - - - - - - - - -
# 
if (dist[:3] == 'man'):
  dist_func = distMan
elif (dist[:3] == 'euc'):
  dist_func = distEuc
else:
  dist_func = distCan

# Step 3: Start k-NN classifier on all test records - - - - - - - - - - - - - -

for test_rec in test_data:
  dist_list = []  # List of the calculated distances and corresponding classes

  for train_rec in train_data:
    d = dist_func(test_rec, train_rec[:-1])

    c = int(train_rec[-1])  # Get the training record's class (last element)

    dist_list.append([d,c])

  dist_list.sort()  # Smallest distance will be first

  # Now count number of class values in the first k elements in dist list
  #
  zeros = 0
  ones =  0

  for i in range(k):
    if (dist_list[i][1] == 1):
      ones += 1
    else:
      zeros += 1

  assert ones+zeros == k

  if (zeros > ones):
    print 'Test record %s classified as "0"' % (str(test_rec))
  else:
    print 'Test record %s classified as "1"' % (str(test_rec))

# End --------------------------------------------------------------------------

