Posted on 2013-01-08 15:08
沙漠中的魚 閱讀(1251)
評論(0) 編輯 收藏 所屬分類:
其他 、
Java 、
數據挖掘
public class ClusteringFlyStoneDocument {
public static void main(String[] args) {
final Controller controller = ControllerFactory.createCachingPooling(IDocumentSource.class);
//創建需要處理的屬性對象
final Map<String, Object> attributes = Maps.newHashMap();
//需要處理的文檔集合
List<Document> documents = SampleDocumentData.DOCUMENTS_DATA_MINING;
//將文檔添加到聚類屬性中
CommonAttributesDescriptor.attributeBuilder(attributes).documents(documents);
//設置處理的語言()
MultilingualClusteringDescriptor.attributeBuilder(attributes).defaultLanguage(LanguageCode.CHINESE_SIMPLIFIED);
//設置需要處理的對象,以及聚類的算法
final ProcessingResult englishResult = controller.process(attributes, LingoClusteringAlgorithm.class);
//獲取結果,打印聚類主題及關聯信息
final List<Cluster> clustersByTopic = englishResult.getClusters();
System.out.println("=======聚類主題=====");
for(Cluster cluster : clustersByTopic){
System.out.println("【主題 】" +cluster.getLabel());
List<Document> cDocLst = cluster.getAllDocuments();
for(Document doc : cDocLst){
System.out.println("--------" + doc.getTitle());
}
}
//通過URL進行聚類
final ProcessingResult byDomainClusters = controller.process(documents, null,ByUrlClusteringAlgorithm.class);
final List<Cluster> clustersByDomain = byDomainClusters.getClusters();
System.out.println("=======URL聚類=======");
for(Cluster cluster : clustersByDomain){
System.out.println("【URL】" +cluster.getLabel());
List<Document> cDocLst = cluster.getAllDocuments();
for(Document doc : cDocLst){
System.out.println("----" + doc.getTitle());
}
}
}
}