<rt id="bn8ez"></rt>
<label id="bn8ez"></label>

  • <span id="bn8ez"></span>

    <label id="bn8ez"><meter id="bn8ez"></meter></label>

    paulwong

    WordCount的一個(gè)變種版本…Hadoop

    統(tǒng)計(jì)域名(實(shí)際是host)的計(jì)數(shù)器。

    輸入:一個(gè)文件夾中有一堆的文本文件,內(nèi)容是一行一個(gè)的url,可以想像為數(shù)據(jù)庫(kù)中的一條記錄
    流程:提取url的domain,對(duì)domain計(jì)數(shù)+1
    輸出:域名,域名計(jì)數(shù)

    代碼如下:
    Mapper
    package com.keseek.hadoop;

    import java.io.IOException;
    import java.net.URI;

    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapred.JobConf;
    import org.apache.hadoop.mapred.OutputCollector;
    import org.apache.hadoop.mapred.Reporter;
    import org.apache.hadoop.mapred.Mapper;

    public class DomainCountMapper implements
            Mapper
    <LongWritable, Text, Text, LongWritable> {

        @Override
       
    public void configure(JobConf arg0) {
           
    // Init Text and LongWritable
            domain = new Text();
            one
    = new LongWritable(1);
        }


        @Override
       
    public void close() throws IOException {
           
    // TODO Auto-generated method stub
        }


        @Override
       
    public void map(LongWritable key, Text value,
                OutputCollector
    <Text, LongWritable> output, Reporter reporter)
               
    throws IOException {
           
    // Get URL
            String url = value.toString().trim();

           
    // URL->Domain && Collect
            domain.set(ParseDomain(url));
           
    if (domain.getLength() != 0) {
                output.collect(domain, one);
            }


        }


       
    public String ParseDomain(String url) {
           
    try {
                URI uri
    = URI.create(url);
               
    return uri.getHost();
            }
    catch (Exception e) {
               
    return "";
            }

        }


       
    // Shared used Text domain
        private Text domain;

       
    // One static
        private LongWritable one;

    }

    Reducer

    package com.keseek.hadoop;

    import java.io.IOException;
    import java.util.Iterator;

    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapred.JobConf;
    import org.apache.hadoop.mapred.OutputCollector;
    import org.apache.hadoop.mapred.Reporter;
    import org.apache.hadoop.mapred.Reducer;

    public class DomainCountReducer implements
            Reducer
    <Text, LongWritable, Text, LongWritable> {

        @Override
       
    public void configure(JobConf arg0) {
           
    // TODO Auto-generated method stub

        }


        @Override
       
    public void close() throws IOException {
           
    // TODO Auto-generated method stub

        }


        @Override
       
    public void reduce(Text key, Iterator<LongWritable> values,
                OutputCollector
    <Text, LongWritable> output, Reporter reporter)
               
    throws IOException {
           
    // Count the domain
            long cnt = 0;
           
    while (values.hasNext()) {
                cnt
    += values.next().get();
            }

           
    // Output
            output.collect(key, new LongWritable(cnt));
        }


    }

    Main

    package com.keseek.hadoop;

    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapred.FileInputFormat;
    import org.apache.hadoop.mapred.FileOutputFormat;
    import org.apache.hadoop.mapred.JobClient;
    import org.apache.hadoop.mapred.JobConf;
    import org.apache.hadoop.mapred.RunningJob;
    import org.apache.hadoop.mapred.TextInputFormat;
    import org.apache.hadoop.mapred.TextOutputFormat;

    public class DomainCountMain {
       
    public static void main(String[] args) throws Exception {
           
    // Param for path
            if (args.length != 2) {
                System.out.println(
    "Usage:");
                System.out
                        .println(
    "DomainCountMain.jar  <Input_Path>  <Outpu_Path>");
                System.exit(
    -1);
            }


           
    // Configure JobConf
            JobConf jobconf = new JobConf(DomainCountMain.class);

            jobconf.setJobName(
    "Domain Counter by Coder4");

            FileInputFormat.setInputPaths(jobconf,
    new Path(args[0]));
           FileOutputFormat.setOutputPath(jobconf,
    new Path(args[1]));

            jobconf.setInputFormat(TextInputFormat.
    class);
           jobconf.setOutputFormat(TextOutputFormat.
    class);

            jobconf.setMapperClass(DomainCountMapper.
    class);
            jobconf.setReducerClass(DomainCountReducer.
    class);
           jobconf.setCombinerClass(DomainCountReducer.
    class);

            jobconf.setMapOutputKeyClass(Text.
    class);
            jobconf.setMapOutputValueClass(LongWritable.
    class);
            jobconf.setOutputKeyClass(Text.
    class);
            jobconf.setOutputValueClass(LongWritable.
    class);

           
    // Run job
            RunningJob run = JobClient.runJob(jobconf);
            run.waitForCompletion();
           
    if (run.isSuccessful()) {
                System.out.println(
    "<<<DomainCount Main>>> success.");
            }
    else {
                System.out.println(
    "<<<DomainCount Main>>> error.");
            }

        }

    }

    posted on 2012-09-08 15:30 paulwong 閱讀(266) 評(píng)論(0)  編輯  收藏 所屬分類: HADOOP云計(jì)算

    主站蜘蛛池模板: 久久久www成人免费毛片| 亚洲中文精品久久久久久不卡| 在线观看亚洲天天一三视| 免费v片在线观看无遮挡| 四虎影视永久免费观看| 免费看一级做a爰片久久| 又大又粗又爽a级毛片免费看| 免费a级毛片无码av| 亚洲а∨天堂久久精品| 免费在线观看a级毛片| 亚洲色欲久久久久综合网| 久久久青草青青国产亚洲免观| 亚洲色一色噜一噜噜噜| 亚洲精品国精品久久99热一| 亚洲AV无码一区二区三区DV| 亚洲精品私拍国产福利在线| 久久久亚洲欧洲日产国码aⅴ| 亚洲网红精品大秀在线观看| 亚洲国产精品线观看不卡| 久久亚洲最大成人网4438| 亚洲人av高清无码| 男女男精品网站免费观看 | 亚洲精品人成无码中文毛片| 中文字幕亚洲一区| 亚洲高清在线观看| 亚洲国产精品线观看不卡| 亚洲AV性色在线观看| 特级做a爰片毛片免费看| av永久免费网站在线观看| 91精品成人免费国产片| 黄网址在线永久免费观看| 亚洲?v女人的天堂在线观看| 国产亚洲精品美女久久久| 亚洲精品无码久久久久久久 | 亚洲电影国产一区| 亚洲av片不卡无码久久| 亚洲AV无码专区在线观看成人| 一个人免费观看视频在线中文| 无码囯产精品一区二区免费| 久久精品免费全国观看国产| 免费中文字幕一级毛片|