Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Analyzer plugin framework #2660

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion dev/checkstyle/suppressions.xml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ Portions Copyright (c) 2018-2019, Chris Fraire <cfraire@me.com>.

<suppress checks="ParameterNumber" files="CtagsReader\.java|Definitions\.java|
|JFlexXrefUtils\.java|FileAnalyzerFactory\.java|SearchController\.java|
|Context\.java|HistoryContext\.java|Suggester\.java" />
|Context\.java|HistoryContext\.java|Suggester\.java|AnalyzersInfo\.java" />

<suppress checks="MethodLength" files="Indexer\.java" />
</suppressions>

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -40,35 +40,36 @@ public class AnalyzerGuruHelp {
* {@link AnalyzerGuru#getMagicsMap()}, and
* {@link AnalyzerGuru#getAnalyzerFactoryMatchers()}.
* @return a defined, multi-line String
* @param analyzerGuru an instance of analyzer guru with info about analyzers
*/
public static String getUsage() {
public static String getUsage(AnalyzerGuru analyzerGuru) {
StringBuilder b = new StringBuilder();
b.append("AnalyzerGuru prefixes:\n");
byKey(AnalyzerGuru.getPrefixesMap()).forEach((kv) -> {
byKey(analyzerGuru.getPrefixesMap()).forEach((kv) -> {
b.append(String.format("%-10s : %s\n", reportable(kv.key + '*'),
reportable(kv.fac)));
});

b.append("\nAnalyzerGuru extensions:\n");
byKey(AnalyzerGuru.getExtensionsMap()).forEach((kv) -> {
byKey(analyzerGuru.getExtensionsMap()).forEach((kv) -> {
b.append(String.format("*.%-7s : %s\n",
reportable(kv.key.toLowerCase(Locale.ROOT)),
reportable(kv.fac)));
});

b.append("\nAnalyzerGuru magic strings:\n");
byFactory(AnalyzerGuru.getMagicsMap()).forEach((kv) -> {
byFactory(analyzerGuru.getMagicsMap()).forEach((kv) -> {
b.append(String.format("%-23s : %s\n", reportable(kv.key),
reportable(kv.fac)));
});

b.append("\nAnalyzerGuru magic matchers:\n");
AnalyzerGuru.getAnalyzerFactoryMatchers().forEach((m) -> {
analyzerGuru.getAnalyzerFactoryMatchers().forEach((m) -> {
if (m.getIsPreciseMagic()) {
b.append(reportable(m));
}
});
AnalyzerGuru.getAnalyzerFactoryMatchers().forEach((m) -> {
analyzerGuru.getAnalyzerFactoryMatchers().forEach((m) -> {
if (!m.getIsPreciseMagic()) {
b.append(reportable(m));
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* See LICENSE.txt included in this distribution for the specific
* language governing permissions and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at LICENSE.txt.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/

/*
* Copyright (c) 2019 Oracle and/or its affiliates. All rights reserved.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing attributions?

*/
package org.opengrok.indexer.analysis;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;

/**
* A class wrapping information about used analyzers.
*/
public class AnalyzersInfo {

/**
* Descending string length comparator for magics
*/
private static final Comparator<String> DESCENDING_LENGTH_COMPARATOR =
Comparator.comparingInt(String::length).thenComparing(String::toString);

/**
* Modified when
* {@link AnalyzerFramework#addExtension(String, AnalyzerFactory)}
* or
* {@link AnalyzerFramework#addPrefix(String, AnalyzerFactory)}
* are called to augment the value in {@link AnalyzerGuru#getVersionNo()}.
*/
public final SortedSet<String> customizations;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please add getters? It just does't feel right this way.

Copy link
Contributor Author

@tulinkry tulinkry Feb 10, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I intentionally don’t want to use getters as this class does no provide any functionality so final properties are enough

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even simple DTO classes are always written with getters/setters.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’ve seen codes which don’t follow that. If this is code style for opengrok I can change it of course.


/**
* Map from file names to analyzer factories.
*/
public final Map<String, AnalyzerFactory> fileNames;

/**
* Map from file extensions to analyzer factories.
*/
public final Map<String, AnalyzerFactory> extensions;

/**
* Map from file prefixes to analyzer factories.
*/
public final Map<String, AnalyzerFactory> prefixes;

/**
* Map from magic strings to analyzer factories.
*/
public final SortedMap<String, AnalyzerFactory> magics;

/**
* List of matcher objects which can be used to determine which analyzer
* factory to use.
*/
public final List<FileAnalyzerFactory.Matcher> matchers;

/**
* List of all registered {@code FileAnalyzerFactory} instances.
*/
public final List<AnalyzerFactory> factories;

/**
* A map of {@link FileAnalyzer#getFileTypeName()} to human readable analyzer name.
*/
public final Map<String, String> fileTypeDescriptions;

/**
* Maps from {@link FileAnalyzer#getFileTypeName()} to
* {@link FileAnalyzerFactory}
*/
public final Map<String, AnalyzerFactory> filetypeFactories;

/**
* Maps from {@link FileAnalyzer#getFileTypeName()} to
* {@link FileAnalyzer#getVersionNo()}
*/
public final Map<String, Long> analyzerVersions;


/**
* Construct an empty analyzers info.
*/
public AnalyzersInfo() {
this(
new TreeSet<>(),
new HashMap<>(),
new HashMap<>(),
new HashMap<>(),
new TreeMap<>(DESCENDING_LENGTH_COMPARATOR),
new ArrayList<>(),
new ArrayList<>(),
new HashMap<>(),
new HashMap<>(),
new HashMap<>()
);
}

/**
* Construct the analyzers info with given collections.
*
* @param customizations the customization keys set
* @param fileNames map of filenames to analyzers factories
* @param extensions map of extensions to analyzers factories
* @param prefixes map of prefixes to analyzers factories
* @param magics map of magics to analyzers factories
* @param matchers a list of matchers for analyzers factories
* @param factories a list of analyzers factories
* @param fileTypeDescriptions map of analyzer names to analyzer descriptions
* @param filetypeFactories map of file type to analyzers factories
* @param analyzerVersions map of analyzer names to analyzer versions
*/
private AnalyzersInfo(
SortedSet<String> customizations,
Map<String, AnalyzerFactory> fileNames,
Map<String, AnalyzerFactory> extensions,
Map<String, AnalyzerFactory> prefixes,
SortedMap<String, AnalyzerFactory> magics,
List<FileAnalyzerFactory.Matcher> matchers,
List<AnalyzerFactory> factories,
Map<String, String> fileTypeDescriptions,
Map<String, AnalyzerFactory> filetypeFactories,
Map<String, Long> analyzerVersions
) {
this.customizations = customizations;
this.fileNames = fileNames;
this.extensions = extensions;
this.prefixes = prefixes;
this.magics = magics;
this.matchers = matchers;
this.factories = factories;
this.fileTypeDescriptions = fileTypeDescriptions;
this.filetypeFactories = filetypeFactories;
this.analyzerVersions = analyzerVersions;
}

/**
* Make the object unmodifiable.
*
* @return a new object reference with all properties wrapped as unmodifiable collections
*/
public AnalyzersInfo freeze() {
return new AnalyzersInfo(
Collections.unmodifiableSortedSet(this.customizations),
Collections.unmodifiableMap(this.fileNames),
Collections.unmodifiableMap(this.extensions),
Collections.unmodifiableMap(this.prefixes),
Collections.unmodifiableSortedMap(this.magics),
Collections.unmodifiableList(this.matchers),
Collections.unmodifiableList(this.factories),
Collections.unmodifiableMap(this.fileTypeDescriptions),
Collections.unmodifiableMap(this.filetypeFactories),
Collections.unmodifiableMap(this.analyzerVersions)
);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* See LICENSE.txt included in this distribution for the specific
* language governing permissions and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at LICENSE.txt.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/

/*
* Copyright (c) 2019 Oracle and/or its affiliates. All rights reserved.
*/
package org.opengrok.indexer.analysis;

public interface IAnalyzerPlugin {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please delete the I prefix? I know it is used in C# world but I don't see it that often in Java and I read in some books it should not be used because it shows unnecessary implementation details.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No doubt for that. The trouble here is that we’ve already exposed one interface for plugins with I so this is consistent with the current code.

We should make a change in a future to transfer both cases to the correct name but it’d be a breaking change and I think it shouldn’t be part of this pr.


AnalyzerFactory getFactory();
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.opengrok.indexer.analysis.AnalyzerGuru;
import org.opengrok.indexer.analysis.FileAnalyzer;
import org.opengrok.indexer.analysis.StreamSource;
import org.opengrok.indexer.configuration.RuntimeEnvironment;

/**
* Analyzes a BZip2 file Created on September 22, 2005
Expand Down Expand Up @@ -80,7 +81,7 @@ public void analyze(Document doc, StreamSource src, Writer xrefOut)
String newname = path.substring(0, path.lastIndexOf('.'));
//System.err.println("BZIPPED OF = " + newname);
try (InputStream in = bzSrc.getStream()) {
fa = AnalyzerGuru.getAnalyzer(in, newname);
fa = RuntimeEnvironment.getInstance().getAnalyzerGuru().getAnalyzer(in, newname);
}
if (!(fa instanceof BZip2Analyzer)) {
if (fa.getGenre() == Genre.PLAIN || fa.getGenre() == Genre.XREFABLE) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import org.opengrok.indexer.analysis.AnalyzerGuru;
import org.opengrok.indexer.analysis.FileAnalyzer;
import org.opengrok.indexer.analysis.StreamSource;
import org.opengrok.indexer.configuration.RuntimeEnvironment;
import org.opengrok.indexer.logger.LoggerFactory;

/**
Expand Down Expand Up @@ -85,7 +86,7 @@ public void analyze(Document doc, StreamSource src, Writer xrefOut)
String newname = path.substring(0, path.length() - 3);
//System.err.println("GZIPPED OF = " + newname);
try (InputStream gzis = gzSrc.getStream()) {
fa = AnalyzerGuru.getAnalyzer(gzis, newname);
fa = RuntimeEnvironment.getInstance().getAnalyzerGuru().getAnalyzer(gzis, newname);
}
if (fa == null) {
this.g = Genre.DATA;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.opengrok.indexer.analysis.AnalyzerFactory;
import org.opengrok.indexer.analysis.AnalyzerGuru;
import org.opengrok.indexer.analysis.FileAnalyzer;
import org.opengrok.indexer.analysis.OGKTextField;
import org.opengrok.indexer.analysis.StreamSource;
import org.opengrok.indexer.configuration.RuntimeEnvironment;
import org.opengrok.indexer.search.QueryBuilder;
import org.opengrok.indexer.web.Util;

Expand Down Expand Up @@ -82,7 +82,7 @@ public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOExc
fout.write(ename);
fout.write("\n");

AnalyzerFactory fac = AnalyzerGuru.find(ename);
AnalyzerFactory fac = RuntimeEnvironment.getInstance().getAnalyzerGuru().find(ename);
if (fac instanceof JavaClassAnalyzerFactory) {
if (xrefOut != null) {
xrefOut.append("<br/>");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ public synchronized void setWorking() {
/**
* Check if this plugin has failed during loading or is missing.
*
* This method has the same effect as !{@link isWorking()}.
* This method has the same effect as !{@link #isWorking()}.
*
* @return true if failed, true otherwise
* @see #isWorking()
Expand Down