#!/bin/bash
#
# Test script to run a python script as a mapper executable on hadoop, make sure hadoop processes have been started, e.g. run
#  /usr/local/hadoop/bin start-all.sh
#
# assumes there are images of sort *00.png and *01.png in ${test_images_folder}
#   copies them over to hdfs folder ${hdfs_work_dir}/${hdfs_folder}

# set the folder where to find *00.png and *01.png
test_images_folder=".";

# set the working dir on HDFS, usually the current users home directory on hdfs
hdfs_work_dir="/user/hduser";

# set the name of the output folder on hdfs
hdfs_folder="test_mapper";

# assumes that s5_correct_cams_mapper.py is in the same directory as this bash script, otherwise set its full path below
mapper_script="./s5_correct_cams_mapper.py";

# a parameter of the process that python mapper executes, irrelevant for testing purposes
exh_search_rad=5;

#
#
# The rest sets up the paths and runs the scripts
#
#
hdfs_folder_full_path=$hdfs_work_dir"/"$hdfs_folder;
hdfs_mapper_stdin_file=$hdfs_folder"/stdin.txt";
hdfs_mapper_stdin_file_full_path=$hdfs_work_dir"/"$hdfs_mapper_stdin_file;
hdfs_mapper_job_output_folder=$hdfs_folder_full_path"/out";
hdfs_mapper_output_folder=$hdfs_folder;   # this should be with respect to current dir that job will be submitted

conf_file="./conf_mapper.txt";
echo $hdfs_mapper_output_folder > $conf_file
echo $exh_search_rad >> $conf_file

START_TIME=`date +%s`
# copy image files to cluster
python s3_copy_files_from_local.py $test_images_folder .png $hdfs_folder
# prepare a text file with a pair of image paths to be registered on each line, portions of this file will be streamed to mappers
python s4_prepare_stdin_txt.py $hdfs_folder 00.png 01.png $hdfs_mapper_stdin_file
# submit hadoop job
/usr/local/hadoop/bin/hadoop jar /usr/local/hadoop/contrib/streaming/hadoop-streaming-0.20.203.0.jar -file $mapper_script -mapper $mapper_script -reducer NONE -input $hdfs_mapper_stdin_file_full_path -output $hdfs_mapper_job_output_folder -file $conf_file
# copy offset files back to local folder
python s3_copy_files_to_local.py $hdfs_folder _offsets.txt $test_images_folder
# clean up the temp folders and files
/usr/local/hadoop/bin/hadoop dfs -rmr $hdfs_folder_full_path 
rm $conf_file
END_TIME=`date +%s`

ELAPSED=`expr $END_TIME - $START_TIME`
echo Elapsed time : $(date -d "1970-01-01 $ELAPSED sec" +%H:%M:%S)
