Skip to content

Commit

Permalink
Bump the codec version to 10.0. (#13815)
Browse files Browse the repository at this point in the history
Bump the codec version to 10.0.

Lucene100Codec is the exact same file format as Lucene912Codec. This codec
dance just makes things slightly easier to reason about since our backward
compatibility guarantees are aligned with major version: once we drop support
for 9.x indices, we can remove all `Lucene9XXCodec`s.
  • Loading branch information
jpountz committed Oct 1, 2024
1 parent 15168ce commit 4105c6e
Show file tree
Hide file tree
Showing 18 changed files with 1,121 additions and 446 deletions.
4 changes: 3 additions & 1 deletion lucene/backward-codecs/src/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
exports org.apache.lucene.backward_codecs.lucene94;
exports org.apache.lucene.backward_codecs.lucene95;
exports org.apache.lucene.backward_codecs.lucene99;
exports org.apache.lucene.backward_codecs.lucene912;
exports org.apache.lucene.backward_codecs.packed;
exports org.apache.lucene.backward_codecs.store;

Expand All @@ -62,5 +63,6 @@
org.apache.lucene.backward_codecs.lucene92.Lucene92Codec,
org.apache.lucene.backward_codecs.lucene94.Lucene94Codec,
org.apache.lucene.backward_codecs.lucene95.Lucene95Codec,
org.apache.lucene.backward_codecs.lucene99.Lucene99Codec;
org.apache.lucene.backward_codecs.lucene99.Lucene99Codec,
org.apache.lucene.backward_codecs.lucene912.Lucene912Codec;
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene912;
package org.apache.lucene.backward_codecs.lucene912;

import java.util.Objects;
import org.apache.lucene.codecs.Codec;
Expand All @@ -37,6 +37,7 @@
import org.apache.lucene.codecs.lucene90.Lucene90PointsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat;
import org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat;
import org.apache.lucene.codecs.lucene94.Lucene94FieldInfosFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99SegmentInfoFormat;
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@ org.apache.lucene.backward_codecs.lucene92.Lucene92Codec
org.apache.lucene.backward_codecs.lucene94.Lucene94Codec
org.apache.lucene.backward_codecs.lucene95.Lucene95Codec
org.apache.lucene.backward_codecs.lucene99.Lucene99Codec
org.apache.lucene.backward_codecs.lucene912.Lucene912Codec
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
import org.apache.lucene.codecs.lucene100.Lucene100Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.KnnByteVectorField;
Expand All @@ -42,7 +42,7 @@
public class TestHnswBitVectorsFormat extends BaseIndexFileFormatTestCase {
@Override
protected Codec getCodec() {
return new Lucene912Codec() {
return new Lucene100Codec() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return new HnswBitVectorsFormat();
Expand Down
5 changes: 3 additions & 2 deletions lucene/core/src/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* limitations under the License.
*/

import org.apache.lucene.codecs.lucene912.Lucene912Codec;
import org.apache.lucene.codecs.lucene100.Lucene100Codec;

/** Lucene Core. */
@SuppressWarnings("module") // the test framework is compiled after the core...
Expand All @@ -34,6 +34,7 @@
exports org.apache.lucene.codecs.lucene95;
exports org.apache.lucene.codecs.lucene99;
exports org.apache.lucene.codecs.lucene912;
exports org.apache.lucene.codecs.lucene100;
exports org.apache.lucene.codecs.perfield;
exports org.apache.lucene.codecs;
exports org.apache.lucene.document;
Expand Down Expand Up @@ -72,7 +73,7 @@
provides org.apache.lucene.analysis.TokenizerFactory with
org.apache.lucene.analysis.standard.StandardTokenizerFactory;
provides org.apache.lucene.codecs.Codec with
Lucene912Codec;
Lucene100Codec;
provides org.apache.lucene.codecs.DocValuesFormat with
org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
provides org.apache.lucene.codecs.KnnVectorsFormat with
Expand Down
2 changes: 1 addition & 1 deletion lucene/core/src/java/org/apache/lucene/codecs/Codec.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ static NamedSPILoader<Codec> getLoader() {
return LOADER;
}

static Codec defaultCodec = LOADER.lookup("Lucene912");
static Codec defaultCodec = LOADER.lookup("Lucene100");
}

private final String name;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene100;

import java.util.Objects;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CompoundFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90CompoundFormat;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90NormsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PointsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat;
import org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat;
import org.apache.lucene.codecs.lucene94.Lucene94FieldInfosFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99SegmentInfoFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;

/**
* Implements the Lucene 10.0 index format
*
* <p>If you want to reuse functionality of this codec in another codec, extend {@link FilterCodec}.
*
* @see org.apache.lucene.codecs.lucene100 package documentation for file format details.
* @lucene.experimental
*/
public class Lucene100Codec extends Codec {

/** Configuration option for the codec. */
public enum Mode {
/** Trade compression ratio for retrieval speed. */
BEST_SPEED(Lucene90StoredFieldsFormat.Mode.BEST_SPEED),
/** Trade retrieval speed for compression ratio. */
BEST_COMPRESSION(Lucene90StoredFieldsFormat.Mode.BEST_COMPRESSION);

private final Lucene90StoredFieldsFormat.Mode storedMode;

private Mode(Lucene90StoredFieldsFormat.Mode storedMode) {
this.storedMode = Objects.requireNonNull(storedMode);
}
}

private final TermVectorsFormat vectorsFormat = new Lucene90TermVectorsFormat();
private final FieldInfosFormat fieldInfosFormat = new Lucene94FieldInfosFormat();
private final SegmentInfoFormat segmentInfosFormat = new Lucene99SegmentInfoFormat();
private final LiveDocsFormat liveDocsFormat = new Lucene90LiveDocsFormat();
private final CompoundFormat compoundFormat = new Lucene90CompoundFormat();
private final NormsFormat normsFormat = new Lucene90NormsFormat();

private final PostingsFormat defaultPostingsFormat;
private final PostingsFormat postingsFormat =
new PerFieldPostingsFormat() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return Lucene100Codec.this.getPostingsFormatForField(field);
}
};

private final DocValuesFormat defaultDVFormat;
private final DocValuesFormat docValuesFormat =
new PerFieldDocValuesFormat() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
return Lucene100Codec.this.getDocValuesFormatForField(field);
}
};

private final KnnVectorsFormat defaultKnnVectorsFormat;
private final KnnVectorsFormat knnVectorsFormat =
new PerFieldKnnVectorsFormat() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return Lucene100Codec.this.getKnnVectorsFormatForField(field);
}
};

private final StoredFieldsFormat storedFieldsFormat;

/** Instantiates a new codec. */
public Lucene100Codec() {
this(Mode.BEST_SPEED);
}

/**
* Instantiates a new codec, specifying the stored fields compression mode to use.
*
* @param mode stored fields compression mode to use for newly flushed/merged segments.
*/
public Lucene100Codec(Mode mode) {
super("Lucene100");
this.storedFieldsFormat =
new Lucene90StoredFieldsFormat(Objects.requireNonNull(mode).storedMode);
this.defaultPostingsFormat = new Lucene912PostingsFormat();
this.defaultDVFormat = new Lucene90DocValuesFormat();
this.defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat();
}

@Override
public final StoredFieldsFormat storedFieldsFormat() {
return storedFieldsFormat;
}

@Override
public final TermVectorsFormat termVectorsFormat() {
return vectorsFormat;
}

@Override
public final PostingsFormat postingsFormat() {
return postingsFormat;
}

@Override
public final FieldInfosFormat fieldInfosFormat() {
return fieldInfosFormat;
}

@Override
public final SegmentInfoFormat segmentInfoFormat() {
return segmentInfosFormat;
}

@Override
public final LiveDocsFormat liveDocsFormat() {
return liveDocsFormat;
}

@Override
public final CompoundFormat compoundFormat() {
return compoundFormat;
}

@Override
public final PointsFormat pointsFormat() {
return new Lucene90PointsFormat();
}

@Override
public final KnnVectorsFormat knnVectorsFormat() {
return knnVectorsFormat;
}

/**
* Returns the postings format that should be used for writing new segments of <code>field</code>.
*
* <p>The default implementation always returns "Lucene912".
*
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
* future version of Lucene are only guaranteed to be able to read the default implementation,
*/
public PostingsFormat getPostingsFormatForField(String field) {
return defaultPostingsFormat;
}

/**
* Returns the docvalues format that should be used for writing new segments of <code>field</code>
* .
*
* <p>The default implementation always returns "Lucene90".
*
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
* future version of Lucene are only guaranteed to be able to read the default implementation.
*/
public DocValuesFormat getDocValuesFormatForField(String field) {
return defaultDVFormat;
}

/**
* Returns the vectors format that should be used for writing new segments of <code>field</code>
*
* <p>The default implementation always returns "Lucene99HnswVectorsFormat".
*
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
* future version of Lucene are only guaranteed to be able to read the default implementation.
*/
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return defaultKnnVectorsFormat;
}

@Override
public final DocValuesFormat docValuesFormat() {
return docValuesFormat;
}

@Override
public final NormsFormat normsFormat() {
return normsFormat;
}
}
Loading

0 comments on commit 4105c6e

Please sign in to comment.