Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv

### Added

- We added support for heuristic keywords delimiter detection: special delimiters such as ";" will be replaced with "," in the importing process. [#12974](https://github.com/JabRef/jabref/issues/12974)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

2. Changelog claims comma replacement 📘 Rule violation ≡ Correctness

The new CHANGELOG entry states that ";" will be replaced with "," during import, but the
implementation re-serializes keywords using the user's configured separator, which is not
necessarily a comma. This makes the release note misleading for end users.
Agent Prompt
## Issue description
The CHANGELOG entry incorrectly claims that semicolons are replaced with commas during import.

## Issue Context
The implementation detects import delimiters heuristically but writes keywords back using the user's configured keyword separator, not always a comma.

## Fix Focus Areas
- CHANGELOG.md[14-14]
- jablib/src/main/java/org/jabref/logic/importer/fileformat/BibtexParser.java[810-815]

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools

- We added a label to the Group dropdown in the Import Dialog. [#15567](https://github.com/JabRef/jabref/issues/15567)
- We added a related work text extractor, which finds and inserts the related work text into bib entries from references in the texts. [#9840](https://github.com/JabRef/jabref/issues/9840)
- We added a hover button on group rows to quickly add a new group or subgroup. [#12289](https://github.com/JabRef/jabref/issues/12289)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.BibEntryType;
import org.jabref.model.entry.BibtexString;
import org.jabref.model.entry.KeywordList;
import org.jabref.model.entry.LinkedFile;
import org.jabref.model.entry.field.Field;
import org.jabref.model.entry.field.FieldFactory;
Expand Down Expand Up @@ -97,6 +98,7 @@ public class BibtexParser implements Parser {
private static final DocumentBuilderFactory DOCUMENT_BUILDER_FACTORY = DocumentBuilderFactory.newInstance();
private static final Pattern EPILOG_PATTERN = Pattern.compile("\\w+\\s*=.*,");
private static final int INDEX_RELATIVE_PATH_IN_PLIST = 4;
private static final List<Character> IMPORT_KEYWORD_DELIMITERS = List.of(';', ','); // Default delimiters to try when importing keywords, in priority order.
private final Deque<Character> pureTextFromFile = new LinkedList<>();
private final ImportFormatPreferences importFormatPreferences;
private PushbackReader pushbackReader;
Expand All @@ -106,6 +108,7 @@ public class BibtexParser implements Parser {

private int line = 1;
private int column = 1;

// Stores the last read column of the highest column number encountered on any line so far.
// The intended data structure is Stack, but it is not used because Java code style checkers complain.
// In basic JDK data structures, there is no size-limited stack. We did not want to include Apache Commons Collections only for "CircularFifoBuffer"
Expand Down Expand Up @@ -759,9 +762,11 @@ private void parseField(BibEntry entry) throws IOException {
// it inconvenient for users if JabRef did not accept it.
if (field.getProperties().contains(FieldProperty.PERSON_NAMES)) {
entry.setField(field, entry.getField(field).orElse("") + " and " + content);
} else if (StandardField.KEYWORDS == field) {
} else if (StandardField.KEYWORDS == field) { // If there are duplicated keywords fields.
// TODO: multiple keywords fields should be combined to one
entry.addKeyword(content, importFormatPreferences.bibEntryPreferences().getKeywordSeparator());
// Parse the new content with the heuristic delimiter.
KeywordList importedKeywords = KeywordList.parseImport(content, IMPORT_KEYWORD_DELIMITERS);
entry.addKeywords(importedKeywords, importFormatPreferences.bibEntryPreferences().getKeywordSeparator());
}
} else {
// If a BibDesk File Field is encountered
Expand Down Expand Up @@ -793,7 +798,13 @@ private void parseField(BibEntry entry) throws IOException {
LOGGER.error("Could not parse BibDesk files content (field: bdsk-file...) for entry {}", entry, e);
}
} else {
entry.setField(field, content);
if (StandardField.KEYWORDS == field) { // If it is the first encountered keywords field.
// Parse the new content with the heuristic delimiter.
KeywordList importedKeywords = KeywordList.parseImport(content, IMPORT_KEYWORD_DELIMITERS);
entry.addKeywords(importedKeywords, importFormatPreferences.bibEntryPreferences().getKeywordSeparator());
} else {
entry.setField(field, content);
}
}
}
}
Expand Down
7 changes: 7 additions & 0 deletions jablib/src/main/java/org/jabref/model/entry/BibEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,13 @@ public void addKeywords(@NonNull Collection<String> keywords, Character delimite
keywords.forEach(keyword -> addKeyword(keyword, delimiter));
}

/// Add a keywordList to entry
///
/// @param keywordList A list of keywords to add
public void addKeywords(@NonNull KeywordList keywordList, Character delimiter) {
keywordList.forEach(keyword -> addKeyword(keyword, delimiter));
}

public KeywordList getKeywords(Character delimiter) {
return getFieldAsKeywords(StandardField.KEYWORDS, delimiter);
}
Expand Down
17 changes: 17 additions & 0 deletions jablib/src/main/java/org/jabref/model/entry/KeywordList.java
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,23 @@ public static KeywordList parse(@NonNull String keywordString, @NonNull Characte
return keywordList;
}

/// Parses the keyword list using heuristic delimiter detection for the importing process.
/// Tries each delimiter in the provided list in priority order; if none found, falls back to comma.
///
/// @param keywordString a String of keywordChains
/// @param delimiters a List of delimiters used for separating the keywords in the importing process
/// @return a parsed list containing the keywordChains
public static KeywordList parseImport(@NonNull String keywordString, @NonNull List<Character> delimiters) {
for (Character delimiter : delimiters) {
KeywordList keywordList = parse(keywordString, delimiter);
if (keywordList.size() > 1) {
return keywordList;
}
}
// Falls back to comma if none of the delimiters are found.
return parse(keywordString, ',');
}
Comment on lines +91 to +106
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

3. Missing openfasttrace requirement entry 📘 Rule violation ⚙ Maintainability

This PR introduces a user-visible change to BibTeX keyword import behavior (heuristic delimiter
detection and semicolon splitting) but does not add a corresponding OpenFastTrace req~...~1
requirement in docs/requirements/. This breaks requirements tracing for a significant behavior
change.
Agent Prompt
## Issue description
A significant import behavior change was implemented without adding a corresponding OpenFastTrace requirement entry under `docs/requirements/`.

## Issue Context
The PR adds heuristic delimiter detection for keyword import (`parseImport`/`detectImportDelimiter`) and new tests for semicolon-separated keywords.

## Fix Focus Areas
- jablib/src/main/java/org/jabref/model/entry/KeywordList.java[91-115]
- jablib/src/test/java/org/jabref/logic/importer/fileformat/BibtexParserTest.java[2181-2193]

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools


public static String serialize(List<Keyword> keywords, Character delimiter) {
String delimiterStr = delimiter.toString();
String escapeSequenceStr = Keyword.DEFAULT_ESCAPE_SYMBOL.toString();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2151,7 +2151,7 @@ void parseDuplicateKeywordsWithOnlyOneEntry() throws ParseException {
""");

BibEntry expectedEntry = new BibEntry(StandardEntryType.Article)
.withField(StandardField.KEYWORDS, "asdf,asdf,asdf");
.withField(StandardField.KEYWORDS, "asdf");

assertEquals(Optional.of(expectedEntry), result);
}
Expand All @@ -2163,7 +2163,7 @@ void parseDuplicateKeywordsWithTwoEntries() throws IOException {
.withCitationKey("Test2017");

BibEntry expectedEntrySecond = new BibEntry(StandardEntryType.Article)
.withField(StandardField.KEYWORDS, "asdf,asdf,asdf");
.withField(StandardField.KEYWORDS, "asdf");

String entries = """
@Article{Test2017,
Expand All @@ -2178,6 +2178,20 @@ void parseDuplicateKeywordsWithTwoEntries() throws IOException {
assertEquals(List.of(expectedEntryFirst, expectedEntrySecond), result.getDatabase().getEntries());
}

@Test
void parseSemicolonSeparatedKeywords() throws ParseException {
Optional<BibEntry> result = parser.parseSingleEntry("""
@Article{,
Keywords={keywordOne; keywordTwo; keywordThree},
}
""");

BibEntry expectedEntry = new BibEntry(StandardEntryType.Article)
.withField(StandardField.KEYWORDS, "keywordOne, keywordTwo, keywordThree");

assertEquals(Optional.of(expectedEntry), result);
}

@Test
void parseBibDeskLinkedFiles() throws IOException {

Expand Down
31 changes: 31 additions & 0 deletions jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,44 @@ void asStringAddsSpaceAfterDelimiter() {
assertEquals("keywordOne, keywordTwo", keywords.getAsString(','));
}

@Test
void parseImportSingleKeyword() {
assertEquals(new KeywordList("keywordOne"),
KeywordList.parseImport("keywordOne", List.of(';', ',')));
}

@Test
void parseImportWithSemicolonDelimiter() {
assertEquals(new KeywordList("keywordOne", "keywordTwo"),
KeywordList.parseImport("keywordOne; keywordTwo", List.of(';', ',')));
}

@Test
void parseImportWithCommaDelimiter() {
assertEquals(new KeywordList("keywordOne", "keywordTwo"),
KeywordList.parseImport("keywordOne, keywordTwo", List.of(';', ',')));
}

@Test
void parseImportPrefersSemicolonOverComma() {
assertEquals(new KeywordList("keywordOne, keywordTwo", "keywordThree"),
KeywordList.parseImport("keywordOne, keywordTwo; keywordThree", List.of(';', ',')));
}

@Test
void parseHierarchicalChain() {
Keyword expected = Keyword.of(List.of("Parent", "Node", "Child"));

assertEquals(new KeywordList(expected), KeywordList.parse("Parent > Node > Child", ','));
}

@Test
void parseImportHierarchicalChain() {
Keyword expected = Keyword.of(List.of("Parent", "Node", "Child"));

assertEquals(new KeywordList(expected), KeywordList.parseImport("Parent > Node > Child", List.of(';', ',')));
}

@Test
void parseTwoHierarchicalChains() {
Keyword expectedOne = Keyword.of(List.of("Parent1", "Node1", "Child1"));
Expand Down
Loading