Hadoop MapReduce program for word replace

Directory structure of WordReplace Program

WordReplaceDriver.java
package com.javatechnical.hadoop.WordReplace;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class WordReplaceDriver implements Tool{
 
 Configuration conf;

 @Override
 public Configuration getConf() {
  // TODO Auto-generated method stub
  return conf;
 }

 @Override
 public void setConf(Configuration conf) {
  // TODO Auto-generated method stub
  this.conf=conf;
 }

 @Override
 public int run(String[] args) throws Exception {
  
  Job wordReplaceJob = new Job(conf);
  
  wordReplaceJob.setJobName("Word Replace Test");
  
  wordReplaceJob.setJarByClass(this.getClass());
  
  wordReplaceJob.setMapperClass(WordReplaceMapper.class);  
  
  wordReplaceJob.setNumReduceTasks(0);  
  
  
  wordReplaceJob.setMapOutputKeyClass(Text.class);
  
  wordReplaceJob.setMapOutputValueClass(NullWritable.class);
  
  
  wordReplaceJob.setInputFormatClass(TextInputFormat.class);
  
  wordReplaceJob.setOutputFormatClass(TextOutputFormat.class);
  
  Path inputPath = new Path(args[0]);
  
  Path outputPath = new Path(args[1]);
  
  FileInputFormat.addInputPath(wordReplaceJob, inputPath);
  
  FileOutputFormat.setOutputPath(wordReplaceJob, outputPath);
  
  FileSystem fileSystem = outputPath.getFileSystem(conf);
  
  fileSystem.delete(outputPath, true);
  
  int result = wordReplaceJob.waitForCompletion(true)?0:-1; 
  
  return result;
 }
 
 public static void main(String[] args) throws Exception {
  
  Configuration conf = new Configuration();
  
  conf.set("old.word","java");
  
  conf.set("new.word","kava"); 
  
  
  int status = ToolRunner.run(conf, new WordReplaceDriver(), args);
  
  System.out.println("Status : "+status);
  
 }

}
WordReplaceMapper.java
package com.javatechnical.hadoop.WordReplace;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class WordReplaceMapper extends Mapper {
 
 private final String OLD_WORD="old.word";
 
 private final String NEW_WORD="new.word";
 
 String oldWord;
 String newWord;
 
 @Override
 protected void setup(Context context)
   throws IOException, InterruptedException {
  
  Configuration conf = context.getConfiguration();
  
   oldWord = conf.get(OLD_WORD);
  
   newWord = conf.get(NEW_WORD); 
  
 }

 @Override
 protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
  
  
  String line = value.toString();

  if (line.contains(oldWord)) {
   line = line.replaceAll(oldWord, newWord);
  }

  context.write(new Text(line), NullWritable.get());

 }
}
Input file is:
linux java hadoop dba
unix sqoop123 sql
mysql hibernet string class
abstract interface implements
implements inheritance java c++
linux java hadoop dba
unix sqoop123 sql
mysql hibernet string class
abstract interface implements
implements inheritance java c++
linux java hadoop dba
unix sqoop123 sql
mysql hibernet string class
abstract interface implements
implements inheritance java c++
linux java hadoop dba
unix sqoop123 sql
mysql hibernet string class
abstract interface implements
implements inheritance java c++
linux java hadoop dba
unix sqoop123 sql
mysql hibernet string class
abstract interface implements
implements inheritance java c++
output:
linux kava hadoop dba
unix sqoop123 sql
mysql hibernet string class
abstract interface implements
implements inheritance kava c++
linux kava hadoop dba
unix sqoop123 sql
mysql hibernet string class
abstract interface implements
implements inheritance kava c++
linux kava hadoop dba
unix sqoop123 sql
mysql hibernet string class
abstract interface implements
implements inheritance kava c++
linux kava hadoop dba
unix sqoop123 sql
mysql hibernet string class
abstract interface implements
implements inheritance kava c++
linux kava hadoop dba
unix sqoop123 sql
mysql hibernet string class
abstract interface implements
implements inheritance kava c++

MapReduce program for find word length count in hadoop

Directory structure of Hadoop Word Length Count

WordLengthCountMapper.java
package com.javatechnical.hadoop.lengthcount;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class WordLengthCountMapper extends Mapper {

 @Override
 protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

  
  String line = value.toString();
  
 
  
  String[] words = line.split(" "); 
  
  
  
  for (String word : words) {
   
   context.write(new LongWritable(word.length()), new LongWritable(1));
   
  }
 
  
  
  
  
 }

}
WordLengthCountReducer.java
package com.javatechnical.hadoop.lengthcount;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class  WordLengthCountReducer extends Reducer {

 @Override
 protected void reduce(LongWritable key, Iterable values, Context context)
   throws IOException, InterruptedException {
  
  long sum=0;
  
  for (LongWritable value : values) {
   
   sum=sum+value.get();
   
  }
  context.write(key, new LongWritable(sum));
  
 }

}
WordLengthCountDriver.java
package com.javatechnical.hadoop.lengthcount;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;


public class WordLengthCountDriver implements Tool {
 
 Configuration  conf;

 @Override
 public Configuration getConf() {
  // TODO Auto-generated method stub
  return conf;
 }

 @Override
 public void setConf(Configuration conf) {
  this.conf=conf;
  
 }

 @Override
 public int run(String[] args) throws Exception {
  
  Job wordLengthCountJob = new Job(conf);
  
  wordLengthCountJob.setJobName("Word Length CountTest");
  
  wordLengthCountJob.setJarByClass(this.getClass());
  
  wordLengthCountJob.setMapperClass(WordLengthCountMapper.class);
  
  wordLengthCountJob.setReducerClass(WordLengthCountReducer.class);
  
  wordLengthCountJob.setMapOutputKeyClass(LongWritable.class);
  
  wordLengthCountJob.setMapOutputValueClass(LongWritable.class);
  
  wordLengthCountJob.setOutputKeyClass(LongWritable.class);
  
  wordLengthCountJob.setOutputValueClass(LongWritable.class);
  
  wordLengthCountJob.setInputFormatClass(TextInputFormat.class);
  
  wordLengthCountJob.setOutputFormatClass(TextOutputFormat.class);
  
  Path inputPath = new Path(args[0]);
  
  Path outputPath = new Path(args[1]);
  
  
  FileInputFormat.addInputPath(wordLengthCountJob, inputPath);
  
  FileOutputFormat.setOutputPath(wordLengthCountJob, outputPath);
  
  FileSystem fs = outputPath.getFileSystem(conf);
  
  fs.delete(outputPath, true);
  
  
  int result = wordLengthCountJob.waitForCompletion(true)?0:-1;
  
  
  return result;
 }
 
 public static void main(String[] args) throws Exception {
  
  Configuration  conf = new Configuration();
  
 int status = ToolRunner.run(conf, new WordLengthCountDriver(), args);
 
 System.out.println("Gender Count Status "+status);
  
 }

}
Input file is:
linux java unix jsp servlet
hadoop pig sqoop hive hbase
java hadoop linux jsp html
linux sqoop html hive hbase
spring jsp hibernate linux java
sqoop hive servlet hadoop unix
linux java unix servlet
hadoop pig sqoop hive hbase
java hadoop linux jsp html
linux sqoop html hive hbase
output:
3 6
4 17
5 16
6 6
7 3
9 1

word count program with MapReduce in Hadoop

Directory structure for Word count program in Hadoop

WordCountMapper.java
package com.javatechnical.WordCount;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class WordCountMapper extends Mapper{
 @Override
 protected void map(LongWritable key, Text value,Context context)
   throws IOException, InterruptedException {
  String line=value.toString();
  String [] words=line.split(" ");
  for (String word : words) {
   context.write(new Text(word), new LongWritable(1));
  }
  
 }

}
WordCountReducer.java
package com.javatechnical.WordCount;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class WordCountReducer extends Reducer {
 @Override
 protected void reduce(Text key, Iterable values,Context context)
   throws IOException, InterruptedException {
  long sum=0;
  for (LongWritable value : values) {
   sum=sum+value.get();
  }
  context.write(key,new LongWritable(sum));
 }

}
WordCountDriver.java
package com.javatechnical.WordCount;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class WordCountDriver implements Tool {
 Configuration conf;

 @Override
 public Configuration getConf() {
  // TODO Auto-generated method stub
  return conf;
 }

 @Override
 public void setConf(Configuration conf) {
  this.conf=conf;
  
 }

 @Override
 public int run(String[] args) throws Exception {
  Job wordCuntJob=new Job();
  wordCuntJob.setJobName("Word Count Job");
  wordCuntJob.setJarByClass(getClass());
  wordCuntJob.setMapperClass(WordCountMapper.class);
  wordCuntJob.setReducerClass(WordCountReducer.class);
  wordCuntJob.setMapOutputKeyClass(Text.class);
  wordCuntJob.setMapOutputValueClass(LongWritable.class);
  wordCuntJob.setOutputKeyClass(Text.class);
  wordCuntJob.setOutputValueClass(LongWritable.class);
  wordCuntJob.setInputFormatClass(TextInputFormat.class);
  wordCuntJob.setOutputFormatClass(TextOutputFormat.class);
  Path inputPath=new Path(args[0]);
  Path outputPath=new Path(args[1]);
  FileInputFormat.addInputPath(wordCuntJob, inputPath);
  FileOutputFormat.setOutputPath(wordCuntJob, outputPath);
  FileSystem fs =outputPath.getFileSystem(conf);
  fs.delete(outputPath, true);
  int result=wordCuntJob.waitForCompletion(true)?0:-1;
  return result;
 }
public static void main(String[] args)throws Exception {
 Configuration conf=new Configuration();
 int status=ToolRunner.run(conf, new WordCountDriver(), args);
 System.out.println(status);
}
}
Input file is :
linux java hadoop dba123 sravan gvaspi
unix sqoop123 sql
mysql hibernet string class
abstract interface implements
implements inheritance java c++
linux java hadoop dba
unix sqoop123 sql
mysql hibernet string class
abstract interface implements
implements inheritance java c++
linux java hadoop dba
unix sqoop123 sql
mysql hibernet string class
abstract interface implements
implements inheritance java c++
linux java hadoop dba
unix sqoop123 sql
mysql hibernet string class
abstract interface implements
implements inheritance java c++
linux java hadoop dba
unix sqoop123 sql
mysql hibernet string class
abstract interface implements
implements inheritance java c++
output:
abstract 5
c++ 5
class 5
dba 4
dba123 1
gvaspi 1
hadoop 5
hibernet 5
implements 10
inheritance 5
interface 5
java 10
linux 5
mysql 5
sql 5
sqoop123 5
sravan 1
string 5
unix 5

MapReduce grep program in Hadoop

project structure of Hadoop grep is:

GrepMapper.java
package com.javatechnical.hadoop.grep;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class GrepMapper extends Mapper {

 @Override
 protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
  
  
 String line = value.toString();
  
 if(line.contains("java"))
 {
  context.write(new Text(line), NullWritable.get());
 }
  
  
 }
}
GrepDriver.java
package com.javatechnical.hadoop.grep;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class GrepDriver implements Tool{
 
 Configuration conf;

 @Override
 public Configuration getConf() {
  // TODO Auto-generated method stub
  return conf;
 }

 @Override
 public void setConf(Configuration conf) {
  // TODO Auto-generated method stub
  this.conf=conf;
 }

 @Override
 public int run(String[] args) throws Exception {
  
  Job genderCountJob = new Job(conf);
  
  genderCountJob.setJobName("Gender Count");
  genderCountJob.setJarByClass(this.getClass());
  
 // genderCountJob.setMapperClass(GrepMapper.class);
  
  
  genderCountJob.setNumReduceTasks(0);  
  
  
  //genderCountJob.setMapOutputKeyClass(Text.class);
  
  //genderCountJob.setMapOutputValueClass(NullWritable.class);
  
  
  genderCountJob.setInputFormatClass(TextInputFormat.class);
  
  genderCountJob.setOutputFormatClass(TextOutputFormat.class);
  
  Path inputPath = new Path(args[0]);
  
  Path outputPath = new Path(args[1]);
  
  FileInputFormat.addInputPath(genderCountJob, inputPath);
  
  FileOutputFormat.setOutputPath(genderCountJob, outputPath);
  
  FileSystem fileSystem = outputPath.getFileSystem(conf);
  
  fileSystem.delete(outputPath, true);
  
  int result = genderCountJob.waitForCompletion(true)?0:-1; 
  
  return result;
 }
 
 public static void main(String[] args) throws Exception {
  
  int status = ToolRunner.run(new Configuration(), new GrepDriver(), args);
  
  System.out.println("Status : "+status);
  
 }

}
Input file ia:
linux java hadoop dba123 sravan gvaspi
unix sqoop123 sql
mysql hibernet string class
abstract interface implements
implements inheritance java c++
linux java hadoop dba
unix sqoop123 sql
mysql hibernet string class
abstract interface implements
implements inheritance java c++
linux java hadoop dba
unix sqoop123 sql
mysql hibernet string class
abstract interface implements
implements inheritance java c++
linux java hadoop dba
unix sqoop123 sql
mysql hibernet string class
abstract interface implements
implements inheritance java c++
linux java hadoop dba
unix sqoop123 sql
mysql hibernet string class
abstract interface implements
implements inheritance java c++
Output:
linux java hadoop dba123 sravan gvaspi
implements inheritance java c++
linux java hadoop dba
implements inheritance java c++
linux java hadoop dba
implements inheritance java c++
linux java hadoop dba
implements inheritance java c++
linux java hadoop dba
implements inheritance java c++

MapReduce program for finding employee gender count in hadoop

project structure for Department Gender count

DeptMFCountMapper.java
package com.javatechnical.deptgender;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class DeptMFCountMapper extends Mapper {

 @Override
 protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

  
  String record = value.toString();
  
  String[] fields = record.split(":"); 
 
  
  context.write(new Text(fields[4]), new Text(fields[2]));
  
  
 }

}
DeptMFCountReducer.java
package com.javatechnical.deptgender;


import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class DeptMFCountReducer extends Reducer {

 @Override
 protected void reduce(Text key, Iterable values, Context context)
   throws IOException, InterruptedException {
  
 int mc=0;
 
 int fc = 0;
  
  for (Text value : values)
  {
   
   String gender = value.toString();
  
   if(gender.equals("male"))
   {
    mc++;
   }
   else
   {
    fc++;
   }
   
  }
  
  String str = "Male Count = "+mc+"   Female Count = "+fc;
  
  context.write(key, new Text(str));
  
 }

}
DeptMFCountDriver.java
package com.javatechnical.deptgender;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;


public class DeptMFCountDriver implements Tool {
 
 Configuration  conf;

 @Override
 public Configuration getConf() {
  // TODO Auto-generated method stub
  return conf;
 }

 @Override
 public void setConf(Configuration conf) {
  this.conf=conf;
  
 }

 @Override
 public int run(String[] args) throws Exception {
  
  Job genderCountJob = new Job(conf);
  
  genderCountJob.setJobName("Dept MF Count Test");
  
  genderCountJob.setJarByClass(this.getClass());
  
  genderCountJob.setMapperClass(DeptMFCountMapper.class);
  
  genderCountJob.setReducerClass(DeptMFCountReducer.class);
  
  genderCountJob.setMapOutputKeyClass(Text.class);
  
  genderCountJob.setMapOutputValueClass(Text.class);
  
  genderCountJob.setOutputKeyClass(Text.class);
  
  genderCountJob.setOutputValueClass(Text.class);
  
  genderCountJob.setInputFormatClass(TextInputFormat.class);
  
  genderCountJob.setOutputFormatClass(TextOutputFormat.class);
  
  Path inputPath = new Path(args[0]);
  
  Path outputPath = new Path(args[1]);
  
  
  FileInputFormat.addInputPath(genderCountJob, inputPath);
  
  FileOutputFormat.setOutputPath(genderCountJob, outputPath);
  
  FileSystem fs = outputPath.getFileSystem(conf);
  
  fs.delete(outputPath, true);
  
  
  int result = genderCountJob.waitForCompletion(true)?0:-1;
  
  
  return result;
 }
 
 public static void main(String[] args) throws Exception {
  
  Configuration  conf = new Configuration();
  
  int status = ToolRunner.run(conf, new DeptMFCountDriver(), args);
 
  System.out.println("Dept Wise Male Female Count Status "+status);
  
 }

}
output:
female 30
male 70

MapReduce program using excel data in hadoop

project structure for excel format in hadoop

ExcelMapper.java
package com.javatechnical.hadoop.excel;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class ExcelMapper extends Mapper {
	

@Override
protected void map(LongWritable key, Text value, Context context) 
throws IOException, InterruptedException {
	
	
	String record = value.toString();
	
	String[] fields = record.split(",");
	
	context.write(new Text(fields[2]), new LongWritable(1) );
	
	}
}
ExcelParser.java
package com.javatechnical.hadoop.excel;

import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;

import org.apache.commons.logging.Log;

import org.apache.commons.logging.LogFactory;
//import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
//import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFRow;
//import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.xssf.usermodel.XSSFCell;

	public class ExcelParser 
	{

		private static final Log LOG = LogFactory.getLog(ExcelParser.class);
		
		private StringBuilder currentString = null;
		private long bytesRead = 0;

		public String parseExcelData(InputStream is) {
			
			try {
				//to get reference of Work book
				XSSFWorkbook workbook = new XSSFWorkbook(is);
				
				XSSFSheet sheet = workbook.getSheetAt(0);
							
				// 0 ---> for the first sheet
				// 1 ----> for the second sheet
				// 2 ---> for the third sheet
						

				// Iterate through each rows from first sheet
				Iterator rowIterator = sheet.rowIterator();
				
				currentString = new StringBuilder();
				
				
				XSSFRow row;
				XSSFCell cell;
								
				while (rowIterator.hasNext()) 
				{
					 row = (XSSFRow)rowIterator.next();
					
					// For each row, iterate through each columns
					@SuppressWarnings("rawtypes")
					Iterator  cellIterator = row.cellIterator();
					
					while (cellIterator.hasNext()) 
					{
						
						
						 cell = (XSSFCell)cellIterator.next();

						switch (cell.getCellType()) 
						{
						
						case XSSFCell.CELL_TYPE_BOOLEAN:
							bytesRead++;
							currentString.append(cell.getBooleanCellValue() + ",");
							break;

						case XSSFCell.CELL_TYPE_NUMERIC:
							bytesRead++;
							currentString.append(cell.getNumericCellValue() + ",");
							break;

						case XSSFCell.CELL_TYPE_STRING:
							bytesRead++;
							currentString.append(cell.getStringCellValue() + ",");
							break;

						}
								
						
					}
					
					currentString.setLength(currentString.length() - 1);
					currentString.append("\n");
				}
				
				is.close();
			} catch (IOException e) {
				LOG.error("IO Exception : File not found " + e);
			}
			//System.out.println("String builder : "+currentString.length()+"vaue  "+currentString);
			String temp = currentString.toString();
			return temp;
			
			
		}
	
		public long getBytesRead() {
			return bytesRead;
		}

}
ExcelRecordReader.java
package com.javatechnical.hadoop.excel;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

public class ExcelRecordReader extends RecordReader {
	
	
	String [] records;
	
	LongWritable key=new LongWritable() ;
	
	Text  value=null;;
	
	
	@Override
	public void initialize(InputSplit inputSplit, TaskAttemptContext context) 
throws IOException, InterruptedException {
		// TODO Auto-generated method stub
				
		FileSplit fileSplit = (FileSplit)inputSplit;
		
		Path path = fileSplit.getPath();
		
		Configuration conf = context.getConfiguration();
		
		FileSystem fs = path.getFileSystem(conf);
		
		FSDataInputStream input = fs.open(path);
		
		
		ExcelParser  parser = new ExcelParser();
		
		String excelData = parser.parseExcelData(input);	
		
		 records = excelData.split("\n");		
		
	}
	
	
	
	@Override
	public boolean nextKeyValue() throws IOException, InterruptedException {
		
		
			if(records.length==0)			
			{
				return false;	
			}
			
			if(key.get()
ExcelReducer.java
package com.javatechnical.hadoop.excel;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class ExcelReducer extends Reducer{
	
	@Override
	protected void reduce(Text key , Iterable values,	Context context)
 throws IOException, InterruptedException {
		
		
		long sum = 0;
		
		for (LongWritable value : values) {
			
			sum = sum+value.get();
			
		}
		
		context.write(key, new LongWritable(sum));
		
		
	}

}
ExcelInputformat.java
package com.javatechnical.hadoop.excel;
import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

public class ExcelInputFormat extends FileInputFormat {

	@Override
	public RecordReader createRecordReader(InputSplit inputsplit, TaskAttemptContext context)
			throws IOException, InterruptedException {
					
		return new ExcelRecordReader();
	}

}
ExcelDriver.java
package com.javatechnical.hadoop.excel;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;




public class ExcelDriver implements Tool{
	
	Configuration conf;

	@Override
	public Configuration getConf() {
		// TODO Auto-generated method stub
		return conf;
	}

	@Override
	public void setConf(Configuration conf) {
		// TODO Auto-generated method stub
		this.conf=conf;
	}

	@Override
	public int run(String[] args) throws Exception {
		
		Job genderCountJob = new Job(conf);
		
		genderCountJob.setJobName("excel data");
		
		genderCountJob.setJarByClass(this.getClass());
		
		genderCountJob.setMapperClass(ExcelMapper.class);
		
		genderCountJob.setReducerClass(ExcelReducer.class);
		
		genderCountJob.setMapOutputKeyClass(Text.class);
		
		genderCountJob.setMapOutputValueClass(LongWritable.class);
		
		genderCountJob.setOutputKeyClass(Text.class);
		
		genderCountJob.setOutputValueClass(LongWritable.class);
		
		genderCountJob.setInputFormatClass(ExcelInputFormat.class);
		
		genderCountJob.setOutputFormatClass(TextOutputFormat.class);
		
		Path inputPath = new Path(args[0]);
		
		Path outputPath = new Path(args[1]);
		
		FileInputFormat.addInputPath(genderCountJob, inputPath);
		
		FileOutputFormat.setOutputPath(genderCountJob, outputPath);
		
		FileSystem fileSystem = outputPath.getFileSystem(conf);
		
		fileSystem.delete(outputPath, true);
		
		int result = genderCountJob.waitForCompletion(true)?0:-1;	
		
		return result;
	}
	
	public static void main(String[] args) throws Exception {
		
		int status = ToolRunner.run(new Configuration(), new ExcelDriver(), args);
		
		System.out.println("Status : "+status);
		
	}
	
}
	
output:
Issue Status	1
closed	11
fixed	18
open	666

MapReduce Generating employee report program in Hadoop

project structure for Employee Report generator in

MapReduce Program



EmpReportMapper.java
package com.javatechnical.empReport;

import java.io.IOException;

import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class EmpReportMapper extends Mapper{
 
 @Override
 protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
  
  String record = value.toString(); 
  
  String[] fileds = record.split(":");
  
  double salary = Double.parseDouble(fileds[3]);
  
  context.write(new Text(fileds[4]),new DoubleWritable(salary));
  
 }

}
EmpReportReducer.java
package com.javatechnical.empReport;

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class EmpReportReducer extends Reducer {
 

 @Override
 protected void reduce(Text deptName, Iterable salaries, Context context)
   throws IOException, InterruptedException {

  long empCount = 0;

  Iterator iterator = salaries.iterator();

  DoubleWritable temp = iterator.next();

  double firstSalary = temp.get();

  double minSalary = firstSalary;
  double maxSalary = firstSalary;
  double sum = firstSalary;

  empCount++;

  while (iterator.hasNext()) {
   temp = iterator.next();

   double salary = temp.get();

   if (salary > maxSalary) {
    maxSalary = salary;
   }

   if (salary < minSalary) {
    minSalary = salary;
   }

   empCount++;

   sum = sum + salary;

  }
  
  double avg = sum / empCount;
  
  
  String output = deptName+"\t"+empCount+"\t"+maxSalary+"\t"+minSalary+"\t"+avg+"\t"+sum;
  
  context.write(new Text(output), NullWritable.get());
  
 
 }

}
EmpReportDriver.java
package com.javatechnical.empReport;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class EmpReportDriver implements Tool{

 Configuration conf;
 @Override
 public Configuration getConf() {
  // TODO Auto-generated method stub
  return conf;
 }

 @Override
 public void setConf(Configuration conf) {
  // TODO Auto-generated method stub
  this.conf=conf;
 }

 @Override
 public int run(String[] args) throws Exception {
  
  Job empReportJob = new Job(getConf());
  
  empReportJob.setJobName("Emp Report Group ");
  
  empReportJob.setJarByClass(this.getClass());  
  
  empReportJob.setMapperClass(EmpReportMapper.class);
  
  empReportJob.setReducerClass(EmpReportReducer.class);
  
  empReportJob.setMapOutputKeyClass(Text.class);
  
  empReportJob.setMapOutputValueClass(DoubleWritable.class);
  
  empReportJob.setOutputKeyClass(Text.class);
  
  empReportJob.setOutputValueClass(NullWritable.class);
  
  empReportJob.setInputFormatClass(TextInputFormat.class);
  
  empReportJob.setOutputFormatClass(TextOutputFormat.class);
  
  Path inputPath = new Path(args[0]); 
  Path outputPath = new Path(args[1]); 
  
  FileInputFormat.addInputPath(empReportJob, inputPath);
  
  FileOutputFormat.setOutputPath(empReportJob, outputPath);
  
  FileSystem fs = outputPath.getFileSystem(conf);
  
  fs.delete(outputPath, true);
   
  int result = empReportJob.waitForCompletion(true)?0:-1;  
  
  return result;
 }
 
 public static void main(String[] args) throws Exception {
  
  Configuration conf = new Configuration();
  
  int status = ToolRunner.run(conf, new EmpReportDriver(), args);
  
  System.out.println(" emp data filter status : "+status); 
  
  
 }

}
output:
admin 20 24000.0 23000.0 23500.0 470000.0
dev 40 125000.0 23000.0 49250.0 1970000.0
finance 20 35000.0 22000.0 28500.0 570000.0
testing 20 26000.0 24000.0 25000.0 500000.0