package com.libc;import java.io.IOException;import java.io.UnsupportedEncodingException;import java.util.HashMap;import java.util.Iterator;import java.util.Map;import java.util.Set;import java.util.regex.Matcher;import java.util.regex.Pattern;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;public class Process { public static class TokenizerMapper extends Mapper
此版本为第一版,运行几天后服务器日志量暴增,导致堆栈溢出错误,
因此修改为第二版后可以对jvm内存自定义配置
方案一:
/opt/aimcpro/mapred/bin/hadoop jar libc_process.jar com.libc.Process -D mapred.child.java.opts=-Xmx2048m hdfs://mycluster/libc/input hdfs://mycluster/libc/output
方案二:
Configuration cc = job.getConfiguration();
String mem = cc.get("mapred.child.java.opts");
System.out.println(mem);
即在代码中更改设置。
当jvm从1G设为2G后,job顺利通过了
数据一直在增长啊:
20140801 6058177
20140802 7490572
20140803 8114244
20140804 7278280
20140805 7673678
20140806 8213066
20140807 9192677
20140808 9362143
20140809 10989437
20140810 11396093
20140811 10229799
20140812 10346527
20140813 10064709
20140814 11017971
20140815 11634611
20140818 10422815
20140819 12874181
20140820 13478590
20140821 12530974
20140822 11590312
20140823 15705258