/* BEGIN LICENSE
 * Copyright © Blue Mind SAS, 2012-2020
 *
 * This file is part of BlueMind. BlueMind is a messaging and collaborative
 * solution.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of either the GNU Affero General Public License as
 * published by the Free Software Foundation (version 3 of the License).
 *
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * See LICENSE.txt
 * END LICENSE
 */
package net.bluemind.directory.hollow.datamodel.producer;

import java.util.ArrayList;
import java.util.List;
import java.util.Objects;

import com.google.common.base.CharMatcher;
import com.google.common.base.Splitter;
import com.google.common.base.Strings;

import net.bluemind.directory.hollow.datamodel.AddressBookRecord;
import net.bluemind.directory.hollow.datamodel.AnrToken;
import net.bluemind.directory.hollow.datamodel.Email;

public class AnrTokens extends EdgeNgram<AnrToken> {

	public AnrTokens() {
		super(2, 5);
	}

	private static final Splitter EMAIL_CHUNKS = Splitter.on(CharMatcher.anyOf(".-@")).omitEmptyStrings();
	private static final Splitter DN_CHUNKS = Splitter.on(CharMatcher.whitespace()).omitEmptyStrings();

	public List<AnrToken> compute(AddressBookRecord rec) {
		List<AnrToken> tokens = new ArrayList<>();
		if (!Strings.isNullOrEmpty(rec.name)) {
			tokens.add(map(rec.name.toLowerCase()));
			for (String chunk : DN_CHUNKS.split(rec.name)) {
				tokens.addAll(new AnrTokens().compute(chunk));
			}
		}
		if (!Strings.isNullOrEmpty(rec.email)) {
			chunckMail(tokens, rec.email);
		}

		// filter avoid ANR to contains twice the same email
		List<Email> filteredEmails = rec.emails.stream() //
				.filter(email -> !Objects.equals(rec.email, email.address)) //
				.toList();
		for (Email e : filteredEmails) {
			chunckMail(tokens, e.address);
		}

		return tokens.stream().toList();
	}

	private void chunckMail(List<AnrToken> tokens, String email) {
		tokens.add(map(email.toLowerCase()));
		String domainRegex = "\\.[a-z]{2,}$";
		String noDomainEmail = email.replaceAll(domainRegex, "");
		for (String chunk : EMAIL_CHUNKS.split(noDomainEmail)) {
			tokens.addAll(new AnrTokens().compute(chunk));
		}
	}

	@Override
	public AnrToken map(String value) {
		AnrToken ant = new AnrToken();
		ant.token = value;
		return ant;
	}

}
