/*
 * Decompiled with CFR 0.152.
 */
package ghidra.feature.vt.api.correlator.program;

import generic.DominantPair;
import generic.hash.FNV1a64MessageDigest;
import generic.lsh.KandL;
import generic.lsh.LSHMemoryModel;
import generic.lsh.vector.LSHCosineVectorAccum;
import generic.lsh.vector.LSHVector;
import generic.lsh.vector.VectorCompare;
import ghidra.feature.vt.api.correlator.program.LSHMultiHash;
import ghidra.feature.vt.api.correlator.program.SimilarDataProgramCorrelatorFactory;
import ghidra.feature.vt.api.main.VTAssociationType;
import ghidra.feature.vt.api.main.VTMatchInfo;
import ghidra.feature.vt.api.main.VTMatchSet;
import ghidra.feature.vt.api.main.VTScore;
import ghidra.feature.vt.api.util.VTAbstractProgramCorrelator;
import ghidra.framework.options.ToolOptions;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressSetView;
import ghidra.program.model.listing.Data;
import ghidra.program.model.listing.DataIterator;
import ghidra.program.model.listing.Listing;
import ghidra.program.model.listing.Program;
import ghidra.program.model.mem.MemoryAccessException;
import ghidra.util.exception.CancelledException;
import ghidra.util.task.TaskMonitor;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class SimilarDataProgramCorrelator
extends VTAbstractProgramCorrelator {
    public static final double SIMILARITY_THRESHOLD = 0.5;
    HashMap<Address, LSHCosineVectorAccum> sourceMap;
    HashMap<Address, LSHCosineVectorAccum> destinationMap;
    HashMap<Long, Integer> idMap;
    int featureID = 0;
    int minDataLength;

    public SimilarDataProgramCorrelator(Program sourceProgram, AddressSetView sourceAddressSet, Program destinationProgram, AddressSetView destinationAddressSet, ToolOptions options) {
        super(sourceProgram, sourceAddressSet, destinationProgram, destinationAddressSet, options);
    }

    @Override
    protected void doCorrelate(VTMatchSet matchSet, TaskMonitor monitor) throws CancelledException {
        this.minDataLength = this.getOptions().getInt("Minimum data length", 8);
        boolean skipHomogenousData = this.getOptions().getBoolean("Skip Homogenous Data", true);
        monitor.setMessage("Generating source dictionary");
        LSHMultiHash<Address> sourceDictionary = this.generateDictionary(this.getSourceProgram(), matchSet, skipHomogenousData, monitor);
        monitor.setMessage("Finding destination data");
        this.findDestinations(matchSet, sourceDictionary, 0.5, monitor);
    }

    private LSHMultiHash<Address> generateDictionary(Program sourceProgram, VTMatchSet matchSet, boolean skipHomogenousData, TaskMonitor monitor) throws CancelledException {
        LSHMultiHash<Address> dictionary = this.generateLSHMultiHash();
        this.extractNGramFeatures(matchSet, skipHomogenousData, monitor, 4);
        dictionary.add(this.sourceMap, monitor);
        return dictionary;
    }

    private void extractNGramFeatures(VTMatchSet matchSet, boolean skipHomogenousData, TaskMonitor monitor, int n) throws CancelledException {
        this.sourceMap = new HashMap();
        this.destinationMap = new HashMap();
        this.idMap = new HashMap();
        Program sourceProgram = this.getSourceProgram();
        Program destinationProgram = this.getDestinationProgram();
        DataIterator sourceDataIterator = sourceProgram.getListing().getDefinedData(this.getSourceAddressSet(), true);
        DataIterator destinationDataIterator = destinationProgram.getListing().getDefinedData(this.getDestinationAddressSet(), true);
        this.addDataToMap(sourceDataIterator, true, skipHomogenousData, n, monitor);
        this.addDataToMap(destinationDataIterator, false, skipHomogenousData, n, monitor);
    }

    private void addDataToMap(DataIterator dataIt, boolean isSourceProgram, boolean skipHomogenousData, int n, TaskMonitor monitor) throws CancelledException {
        double weight = 1.0 / (double)n;
        AddressSetView addressSet = isSourceProgram ? this.getSourceAddressSet() : this.getDestinationAddressSet();
        FNV1a64MessageDigest digest = new FNV1a64MessageDigest();
        while (dataIt.hasNext() && !monitor.isCancelled()) {
            byte[] allBytes;
            Address address;
            Data data = dataIt.next();
            int length = data.getLength();
            if (length < this.minDataLength || !addressSet.contains(address = data.getAddress())) continue;
            try {
                allBytes = data.getBytes();
            }
            catch (MemoryAccessException e1) {
                continue;
            }
            if (SimilarDataProgramCorrelator.isRepeating(allBytes, monitor) && skipHomogenousData) continue;
            byte[] bytes = new byte[n];
            for (int i = 0; i < data.getLength() - (n - 1) && !monitor.isCancelled(); ++i) {
                if (data.getBytes(bytes, i) != n) {
                    throw new RuntimeException("failed to read vector data at " + String.valueOf(address));
                }
                LSHCosineVectorAccum vector = isSourceProgram ? this.sourceMap.get(address) : this.destinationMap.get(address);
                if (vector == null) {
                    vector = new LSHCosineVectorAccum();
                    if (isSourceProgram) {
                        this.sourceMap.put(address, vector);
                    } else {
                        this.destinationMap.put(address, vector);
                    }
                }
                digest.update(bytes, monitor);
                long hash = digest.digestLong();
                int id = this.getFeatureID(hash);
                vector.addHash(id, weight);
            }
        }
    }

    private static boolean isRepeating(byte[] bytes, TaskMonitor monitor) {
        byte first = bytes[0];
        for (int ii = 1; ii < bytes.length; ++ii) {
            if (monitor.isCancelled()) {
                return true;
            }
            if (bytes[ii] == first) continue;
            return false;
        }
        return true;
    }

    private int getFeatureID(long hash) {
        if (this.idMap.containsKey(hash)) {
            return this.idMap.get(hash);
        }
        ++this.featureID;
        this.idMap.put(hash, this.featureID);
        return this.featureID;
    }

    private void findDestinations(VTMatchSet matchSet, LSHMultiHash<Address> sourceDictionary, double threshold, TaskMonitor monitor) {
        monitor.initialize((long)this.destinationMap.size());
        for (Map.Entry<Address, LSHCosineVectorAccum> entry : this.destinationMap.entrySet()) {
            if (monitor.isCancelled()) {
                return;
            }
            monitor.incrementProgress(1L);
            Address destinationAddress = entry.getKey();
            LSHCosineVectorAccum vector = entry.getValue();
            Set<DominantPair<Address, LSHCosineVectorAccum>> neighbors = sourceDictionary.lookup(vector);
            List<VTMatchInfo> members = this.transform(matchSet, destinationAddress, vector, neighbors, threshold, monitor);
            for (VTMatchInfo member : members) {
                if (monitor.isCancelled()) {
                    return;
                }
                if (member == null) continue;
                matchSet.addMatch(member);
            }
        }
    }

    private List<VTMatchInfo> transform(VTMatchSet matchSet, Address destinationAddress, LSHCosineVectorAccum destinationVector, Set<DominantPair<Address, LSHCosineVectorAccum>> neighbors, double threshold, TaskMonitor monitor) {
        ArrayList<VTMatchInfo> result = new ArrayList<VTMatchInfo>();
        Listing sourceListing = this.getSourceProgram().getListing();
        Listing destinationListing = this.getDestinationProgram().getListing();
        VectorCompare veccompare = new VectorCompare();
        for (DominantPair<Address, LSHCosineVectorAccum> neighbor : neighbors) {
            if (monitor.isCancelled()) break;
            Address sourceAddress = (Address)neighbor.first;
            LSHCosineVectorAccum sourceVector = (LSHCosineVectorAccum)neighbor.second;
            double similarity = sourceVector.compare((LSHVector)destinationVector, veccompare);
            if (similarity < threshold || Double.isNaN(similarity)) continue;
            double confidence = similarity * sourceVector.getLength() * destinationVector.getLength();
            int sourceLength = SimilarDataProgramCorrelator.getDataLength(sourceListing, sourceAddress);
            int destinationLength = SimilarDataProgramCorrelator.getDataLength(destinationListing, destinationAddress);
            VTMatchInfo match = new VTMatchInfo(matchSet);
            match.setSimilarityScore(new VTScore(similarity));
            match.setConfidenceScore(new VTScore(confidence *= 10.0));
            match.setSourceLength(sourceLength);
            match.setDestinationLength(destinationLength);
            match.setSourceAddress(sourceAddress);
            match.setDestinationAddress(destinationAddress);
            match.setTag(null);
            match.setAssociationType(VTAssociationType.DATA);
            result.add(match);
        }
        return result;
    }

    private static int getDataLength(Listing listing, Address address) {
        Data data = listing.getDataAt(address);
        return data.getLength();
    }

    private LSHMultiHash<Address> generateLSHMultiHash() {
        LSHMemoryModel model = (LSHMemoryModel)this.getOptions().getEnum("Memory model", (Enum)SimilarDataProgramCorrelatorFactory.MEMORY_MODEL_DEFAULT);
        int L = KandL.memoryModelToL((LSHMemoryModel)model);
        return new LSHMultiHash<Address>(model.getK(), L);
    }

    @Override
    public String getName() {
        return "Similar Data Match";
    }
}

