Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions core/src/main/java/org/apache/james/core/Domain.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
package org.apache.james.core;

import java.io.Serializable;
import java.net.IDN;
import java.util.Locale;
import java.util.Objects;

Expand Down Expand Up @@ -54,9 +55,27 @@ public static Domain of(String domain) {
Preconditions.checkArgument(domain.length() <= MAXIMUM_DOMAIN_LENGTH,
"Domain name length should not exceed %s characters", MAXIMUM_DOMAIN_LENGTH);

String domainWithoutBrackets = removeBrackets(domain);
String domainWithoutBrackets;
try {
domainWithoutBrackets = IDN.toASCII(removeBrackets(domain), IDN.ALLOW_UNASSIGNED);
} catch (IllegalArgumentException e) {
// IDN.toASCII's own message can be cryptic ("Empty label is not
// a legal name", "A prohibited code point was found in the
// input..."). Let's save wear and tear on the poor developer's
// brain.
throw new IllegalArgumentException(
"Domain '" + domain + "' is invalid according to IDNA: " + e.getMessage(), e);
}
Preconditions.checkArgument(PART_CHAR_MATCHER.matchesAllOf(domainWithoutBrackets),
"Domain parts ASCII chars must be a-z A-Z 0-9 - or _ in %s", domain);
"Domain parts ASCII chars must be a-z A-Z 0-9 - or _ in %s", domain);

if (domainWithoutBrackets.startsWith("xn--") ||
domainWithoutBrackets.contains(".xn--")) {
domainWithoutBrackets = IDN.toUnicode(domainWithoutBrackets);
Preconditions.checkArgument(!domainWithoutBrackets.startsWith("xn--") &&
!domainWithoutBrackets.contains(".xn--"),
"A-label could not be decoded to Unicode in %s", domain);
}

int pos = 0;
int nextDot = domainWithoutBrackets.indexOf('.');
Expand Down
81 changes: 74 additions & 7 deletions core/src/main/java/org/apache/james/core/MailAddress.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

package org.apache.james.core;

import java.net.IDN;
import java.text.Normalizer;
import java.util.Locale;
import java.util.Objects;
import java.util.Optional;
Expand Down Expand Up @@ -172,7 +174,13 @@ private int stripSourceRoute(String address, int pos) {
* @throws AddressException if the parse failed
*/
public MailAddress(String address) throws AddressException {
address = address.trim();
// RFC 6532 §3.1 recommends NFC normalisation. Canonically-equivalent
// Unicode strings (for example U+00E9 vs U+0065 U+0301 — both render
// as "é") then produce equal MailAddress objects with equal hashCode
// values, which dedup, alias resolution and routing-table lookups
// rely on. NFC is a no-op for pure ASCII, so ASCII addresses are
// unaffected.
address = Normalizer.normalize(address.trim(), Normalizer.Form.NFC);
int pos = 0;

// Test if mail address has source routing information (RFC-821) and get rid of it!!
Expand Down Expand Up @@ -418,7 +426,7 @@ public Optional<InternetAddress> toInternetAddress() {
try {
return Optional.of(new InternetAddress(toString()));
} catch (AddressException ae) {
LOGGER.warn("A valid address '{}' as per James criterial fails to parse as a jakarta.mail InternetAdrress", asString());
LOGGER.warn("A valid address '{}' as per James criteria fails to parse as a jakarta.mail InternetAdrress", asString());
return Optional.empty();
}
}
Expand Down Expand Up @@ -513,6 +521,24 @@ private int parseQuotedLocalPart(StringBuilder lpSB, String address, int pos) th
"characters exception <CR>, <LF>, quote (\"), or backslash (\\) at position " +
(pos + 1) + " in '" + address + "'");
}
// Same surrogate-pair check as in parseUnquotedLocalPart:
// unpaired or mis-ordered surrogates would produce
// ill-formed UTF-8 on output.
if (Character.isLowSurrogate(q)) {
throw new AddressException("Unpaired UTF-16 low surrogate in quoted local-part at position " +
(pos + 1) + " in '" + address + "'");
}
if (Character.isHighSurrogate(q)) {
if (pos + 1 >= address.length()
|| !Character.isLowSurrogate(address.charAt(pos + 1))) {
throw new AddressException("Unpaired UTF-16 high surrogate in quoted local-part at position " +
(pos + 1) + " in '" + address + "'");
}
lpSB.append(q);
lpSB.append(address.charAt(pos + 1));
pos += 2;
continue;
}
lpSB.append(q);
pos++;
}
Expand Down Expand Up @@ -549,18 +575,40 @@ private int parseUnquotedLocalPart(StringBuilder lpSB, String address, int pos)
//End of local-part
break;
} else {
//<c> ::= any one of the 128 ASCII characters, but not any
// <special> or <SP>
//<c> ::= any printable ASCII character, or any non-ASCII
// unicode codepoint, but not <special> or <SP>
//<special> ::= "<" | ">" | "(" | ")" | "[" | "]" | "\" | "."
// | "," | ";" | ":" | "@" """ | the control
// characters (ASCII codes 0 through 31 inclusive and
// 127)
// characters (ASCII codes 0 through 31 inclusive,
// 127, and the C1 controls 128 through 159)
//<SP> ::= the space character (ASCII code 32)
char c = address.charAt(pos);
if (c <= 31 || c >= 127 || c == ' ') {
if (c <= 31 || c == 127 || c == ' ' || (c >= 0x80 && c <= 0x9F)) {
throw new AddressException("Invalid character in local-part (user account) at position " +
(pos + 1) + " in '" + address + "'", address, pos + 1);
}
// Java strings are UTF-16, so a supplementary-plane
// codepoint (emoji, CJK extension, etc.) appears here as a
// high-surrogate followed by a low-surrogate. We must keep
// them paired so the address can serialise as well-formed
// UTF-8 (RFC 6532 §3.1) — a lone or mis-ordered surrogate
// would produce ill-formed UTF-8 octets on output.
if (Character.isLowSurrogate(c)) {
throw new AddressException("Unpaired UTF-16 low surrogate in local-part at position " +
(pos + 1) + " in '" + address + "'", address, pos + 1);
}
if (Character.isHighSurrogate(c)) {
if (pos + 1 >= address.length()
|| !Character.isLowSurrogate(address.charAt(pos + 1))) {
throw new AddressException("Unpaired UTF-16 high surrogate in local-part at position " +
(pos + 1) + " in '" + address + "'", address, pos + 1);
}
lpSB.append(c);
lpSB.append(address.charAt(pos + 1));
pos += 2;
lastCharDot = false;
continue;
}
int i = 0;
while (i < SPECIAL.length) {
if (c == SPECIAL[i]) {
Expand Down Expand Up @@ -688,6 +736,7 @@ private int parseDomain(StringBuilder dSB, String address, int pos) throws Addre
// in practice though, we should relax this as domain names can start
// with digits as well as letters. So only check that doesn't start
// or end with hyphen.
boolean unicode = false;
while (true) {
if (pos >= address.length()) {
break;
Expand All @@ -700,13 +749,31 @@ private int parseDomain(StringBuilder dSB, String address, int pos) throws Addre
resultSB.append(ch);
pos++;
continue;
} else if (ch >= 0x0080) {
resultSB.append(ch);
pos++;
unicode = true;
continue;
}
if (ch == '.') {
break;
}
throw new AddressException("Invalid character at " + pos + " in '" + address + "'", address, pos);
}
String result = resultSB.toString();
if (unicode) {
try {
result = IDN.toASCII(result, IDN.ALLOW_UNASSIGNED);
} catch (IllegalArgumentException e) {
throw new AddressException("Domain invalid according to IDNA", address);
}
}
if (result.startsWith("xn--") || result.contains(".xn--")) {
result = IDN.toUnicode(result);
if (result.startsWith("xn--") || result.contains(".xn--")) {
throw new AddressException("Domain invalid according to IDNA", address);
}
}
if (result.startsWith("-") || result.endsWith("-")) {
throw new AddressException("Domain name cannot begin or end with a hyphen \"-\" at position " +
(pos + 1) + " in '" + address + "'", address, pos + 1);
Expand Down
91 changes: 91 additions & 0 deletions core/src/test/java/org/apache/james/core/DomainTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/****************************************************************
* Licensed to the Apache Software Foundation (ASF) under one *
* or more contributor license agreements. See the NOTICE file *
* distributed with this work for additional information *
* regarding copyright ownership. The ASF licenses this file *
* to you under the Apache License, Version 2.0 (the *
* "License"); you may not use this file except in compliance *
* with the License. You may obtain a copy of the License at *
* *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, *
* software distributed under the License is distributed on an *
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
* KIND, either express or implied. See the License for the *
* specific language governing permissions and limitations *
* under the License. *
****************************************************************/

package org.apache.james.core;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

import java.util.stream.Stream;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;


class DomainTest {
@Test
void testPlainDomain() {
Domain d1 = Domain.of("example.com");
assertThat(d1.name().equals(d1.asString()));
Domain d2 = Domain.of("Example.com");
assertThat(d2.name()).isNotEqualTo(d2.asString());
assertThat(d1.asString()).isEqualTo(d2.asString());
}

@Test
void testIPv4Domain() {
Domain d1 = Domain.of("192.0.4.1");
assertThat(d1.asString()).isEqualTo("192.0.4.1");
}

@Test
void testPunycodeIDN() {
Domain d1 = Domain.of("xn--gr-zia.example");
assertThat(d1.asString()).isEqualTo("grå.example");
}

@Test
void testDevanagariDomain() {
Domain d1 = Domain.of("डाटामेल.भारत");
assertThat(d1.asString()).isEqualTo(d1.name());
}

private static Stream<Arguments> malformedDomains() {
return Stream.of(
"😊☺️.example", // emoji not permitted by IDNA
"#.example", // really and truly not permitted
"\uFEFF.example", // U+FEFF is the byte order mark
"\u200C.example", // U+200C is a zero-width non-joiner
"\u200Eibm.example" // U+200E is left-to-right
)
.map(Arguments::of);
}

@ParameterizedTest
@MethodSource("malformedDomains")
void testMalformedDomains(String malformed) {
assertThatThrownBy(() -> Domain.of(malformed))
.as("rejecting malformed domain " + malformed)
.isInstanceOf(IllegalArgumentException.class);
}

@ParameterizedTest
@MethodSource("malformedDomains")
void exceptionForMalformedDomainShouldNameTheOffendingInput(String malformed) {
// Without the offending input in the message, "Domain invalid
// according to IDNA" leaves a future debugger guessing what
// string actually triggered it.
assertThatThrownBy(() -> Domain.of(malformed))
.hasMessageContaining(malformed);
}
}


Loading