#!/usr/bin/awk

# input1: the imputed data
# input2: the dictionary of the original data
# input3: the dictionary of the imputed data
# output: renames the imputed values (not in dictionary) with increasing items

BEGIN{
    original = ARGV[1]
    imputed = ARGV[2]
    data = ARGV[3]

    col = -1
    maxItem = -1

    FS = ","
}

# store the ids of the original columns
FILENAME == original && /\#/ {
    col ++
    cols[col] = $0
    next
}

# store the original values 
FILENAME == original {
    values[col] = values[col] " " $0
    maxItem = $0
    next
}

# first line of imputed dictionary
FILENAME == imputed && FNR == 1{
    col = -1
}

# compute the current list of values
FILENAME == imputed && /\#/{
    col ++;
    split(values[col], tab, " ")
    split("", tabValues)
    for (t in tab)
	tabValues[tab[t]] = 1
    next
}

FILENAME == imputed{
    if (! ($0 in tabValues)){
	maxItem ++
	newValues[(col + 1) ":" $0] = maxItem
    }
    next
}

FILENAME == data && FNR == 1{
}

FILENAME == data{
    for (i = 1; i <= NF; i ++)
	if ((i ":" $i) in newValues)
	    printf("%d ", newValues[i ":" $i])
	else
	    printf("%s ", $i)
    printf("\n")
}

END{
}