Solr Search Engine for Desktop
It takes good amount of time to find a file in Windows machines. Here is simple system where we can index all the desktop files in solr search engine, a famous open source search engine and can be searched in seconds.
Solr is an open source search engine from apache solr https://lucene.apache.org/solr/.
Step 1: Download the solr and run it in cloud mode. Here is the guide to get start on solr. https://lucene.apache.org/solr/guide/7_7/solr-tutorial.html#solr-tutorial
once you download and extract the solr into a file, just start the solr with following command
>./bin/solr start -c -p 8983 -s ../example/cloud/node1/solr
This will start the first node. This may take approximately 30 sec to start it. And it will also start zookeeper ensemble to manage the nodes. Once first node starts, here is the command to start second node.
>./bin/solr start -c -p 7574 -s ../example/cloud/node2/solr -z localhost:9983
this will start the solr engine in two node cluster and can be accessed at http://localhost:8983/solr
Step 2: Create a collection for indexing files
>solr create -c latest -s 2 -rf 2
Step 3: Write a program to extract and index the files names in solr search Engines.
package com.kvn.web.solrClients;
import java.io.File;
import java.io.IOException;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
public class ListFilesOfType {
static String baseFoldersToIndex[] = { "C:/docs", "C:/Java Majestic", "C:/JavaMajestic",
"C:/Users/prabhukvn/Documents", "c:/Users/prabhukvn/Downloads", "C:/POCS", "C:/softwares" };
Predicate<File> isFile = (f1) -> f1.isFile();
String baseUrl = "http://localhost:8983/solr/latest";
HttpSolrClient client = null;
static List<String> fileTypes = new ArrayList<>();
static List<String> filePaths = new ArrayList<>();
static {
fileTypes.add(".pdf");
fileTypes.add(".doc");
fileTypes.add(".docx");
}
static {
filePaths.addAll(Arrays.asList(baseFoldersToIndex));
}
public static void main(String[] args) {
ListFilesOfType obj = new ListFilesOfType();
long startTime = System.currentTimeMillis();
filePaths.stream().map(path -> new File(path)).forEach(obj::processFile);
System.out.println("Total Time:"+(System.currentTimeMillis()-startTime));
}
public void processFile(File file) {
//System.out.println("----" + file.getAbsolutePath() + "--------");
File listOfFiles[] = file.listFiles();
if (listOfFiles != null) {
List<FileDoc> fileDocs = Arrays.stream(listOfFiles).parallel().filter(isFile.and(this::filterFiles))
.map(this::createFileDoc).collect(Collectors.toList());
if (fileDocs != null && fileDocs.size() > 0) {
this.sendToSolar(fileDocs);
}
Arrays.stream(listOfFiles).filter(f2 -> f2.isDirectory()).forEach(this::processFile);
}
}
private void sendToSolar(List<FileDoc> fileDocs) {
try {
// System.out.println("--------------------------------------");
fileDocs.parallelStream().forEach(f3 -> System.out.println(f3.getName()));
HttpSolrClient solrClient = this.connect();
solrClient.addBeans(fileDocs);
solrClient.commit();
} catch (SolrServerException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
FileDoc createFileDoc(File file) {
FileDoc fileDoc = new FileDoc(file.getName(), file.getAbsolutePath(), new Timestamp(file.lastModified()));
return fileDoc;
}
public boolean filterFiles(File f) {
Optional<String> f4 = fileTypes.parallelStream().filter(fileType -> f.getName().endsWith(fileType)).findFirst();
return f4.isPresent();
}
/**
* Connect to solr using solr client.
*
* @return
*/
private HttpSolrClient connect() {
if (client == null) {
client = new HttpSolrClient.Builder(baseUrl).withConnectionTimeout(10000).withSocketTimeout(60000).build();
}
return client;
}
}
And Maven Dependency
<dependency>
<groupId>org.apache.solr</groupId>
<artifactId>solr-solrj</artifactId>
<version>7.7.0</version>
</dependency>
And use http://localhost:8983/solr/#/pdf-files/query to search the data.
Or http://localhost:8983/solr to open the dashboard