我编写了一个Java程序,用于分析基因表达数据的.soft文件并将其写入txt

package il.ac.tau.cs.sw1.bioinformatics;
import org.apache.commons.math3.stat.inference.TestUtils;
import java.io.*;
import java.util.Arrays;
/**
*
* Gene Expression Analyzer
*
* Command line arguments:
*  args[0] - GeoDatasetName: Gene expression dataset name (expects a corresponding
   input file in SOFT format to exist in the local directory).
*  args[1] - Label1: Label of the first sample subset
*  args[2] - Label2: Label of the second sample subset
*  args[3] - Alpha: T-test confidence level : only genes with pValue below this
threshold will be printed to output file
 *
 * Execution example: GeneExpressionAnalyzer GDS4085 "estrogen receptor-negative"    "estrogen     receptor-positive" 0.01
 *
 * @author software1-2014
 *
 */
public class GeneExpressionAnalyzer {

public static void main(String args[]) throws IOException {

    // Reads the dataset from a SOFT input file
    String inputSoftFileName = args[0] + ".soft";
    GeneExpressionDataset geneExpressionDataset = parseGeneExpressionFile (inputSoftFileName);
    System.out.printf ("Gene expression dataset loaded from file %s. %n",inputSoftFileName);
    System.out.printf("Dataset contains %d samples and %d gene probes.%n%n",geneExpressionDataset.samplesNumber, geneExpressionDataset.genesNumber);

    // Writes the dataset to a tabular format
    String tabularFileName = args[0] + "-Tabular.txt";
    writeDatasetToTabularFile(geneExpressionDataset,tabularFileName);
    System.out.printf ("Dataset saved to tabular file - %s.%n%n",tabularFileName);

    // Identifies differentially expressed genes between two sample groups and writes the results to a text file
    String label1 = args[1];
    String label2 =  args[2];
    double alpha = Double.parseDouble(args[3]);
    String diffGenesFileName = args[0] + "-DiffGenes.txt";
    int numOfDiffGenes = writeTopDifferentiallyExpressedGenesToFile(diffGenesFileName,geneExpressionDataset, alpha, label1, label2);
    System.out.printf ("%d differentially expressed genes identified using alpha of %f when comparing the two sample groups [%s] and [%s].%n",numOfDiffGenes,  alpha, label1, label2);
    System.out.printf ("Results saved to file %s.%n",diffGenesFileName);
}

private static float[] StringtoFloat(String[] temp) {
    float[] array = new float[temp.length];
    for (int i = 0; i < temp.length; i++){
        array[i]= Float.parseFloat(temp[i]);
    }
    return array;
}
private static double[] CutToCounter(double[] array, int counter) {
    if (array.length == counter){
        return array;
    }
    double[] args = new double[counter+1];
    for (int i = 0; i < args.length; i++){
        args[i] = array[i];
    }
    return args;
}
private static int min(double[] pValues) {
    double val = 2;
    int index = -1;
    for (int i = 0; i < pValues.length; i++){
        if (pValues[i] < val && pValues[i] != 3.0){
            val = pValues[i];
            index = i;
        }
    }
    return index;
}

private static String changeformat(float[] array) {
    String[] args = new String[array.length];
    for (int i = 0; i < array.length; i++){
        args[i] = String.format("%.2f", array[i]);
    }
    return Arrays.toString(args);
}


/**
 *
 * parseGeneExpressionFile - parses the given SOFT file
 *
 *
 * @param filename A gene expression file in SOFT format
 * @return a GeneExpressionDataset object storing all data parsed from the input file
 * @throws IOException
 */

public static GeneExpressionDataset parseGeneExpressionFile (String filename) throws IOException {

    GeneExpressionDataset dataset = new GeneExpressionDataset();
    BufferedReader buf = new BufferedReader(new FileReader(filename));
    String line  = buf.readLine();
    String[] geneids = null;
    String[] genesymbols = null;
    float[][] datamatrix = null;
    String[][] subsetinfo = new String[10][2];
    String[][] subsetsample = new String[10][];
    int i = 0;
    int j = 0;
    boolean bol = false;
    while (line != null){
        if (line.startsWith("!dataset_sample_count")){
            dataset.samplesNumber = Integer.parseInt(line.substring(24));
        }
        else if (line.startsWith("!dataset_sample_count")){
            dataset.genesNumber = Integer.parseInt(line.substring(25));
            geneids = new String[dataset.genesNumber];
            genesymbols = new String[dataset.genesNumber];
        }
        else if (line.startsWith("^SUBSET")){
            subsetinfo[i][0] = line.substring(10);
            i++;
        }
        else if (line.startsWith("!subset_sample_description")){
            subsetinfo[i][1] = line.substring(22);
        }
        else if (line.startsWith("!subset_sample_id")){
            subsetsample[i-1] = line.substring(20).split(",");
        }
        else if (line.startsWith("!dataset_table_begin")){
            datamatrix = new float[dataset.genesNumber][dataset.samplesNumber];
        }
        else if (line.startsWith("ID_REF")){
            String[] array1 = line.split("\t");
            dataset.sampleIds = (String[]) Arrays.copyOfRange(array1, 2, array1.length);
            bol = true;
        }
        else if (bol && !line.startsWith("!dataset_table_end")){
            String[] array2 = line.split("\t");
            geneids[j] = array2[0];
            genesymbols[j] = array2[1];
            String[] temp = (String[]) Arrays.copyOfRange(array2, 2, array2.length);
            datamatrix[j] = StringtoFloat(temp);
            j++;
        }
    }
    buf.close();
    dataset.geneIds = geneids;
    dataset.geneSymbols = genesymbols;
    dataset.dataMatrix = datamatrix;
    String[] lables = new String[dataset.samplesNumber];
    int k = 0;
    for (String sample : dataset.sampleIds) {
        for (int m = 0; m < subsetsample.length; m++) {
            if (Arrays.binarySearch(subsetsample[m], sample) != -1) {
                lables[k] = subsetsample[m][1];
                k += 1;
            } else {
                continue;
            }
        }
    }
    dataset.labels = lables;
    return dataset;
}


/**
 * writeDatasetToTabularFile
 * writes the dataset to a tabular text file
 *
 * @param geneExpressionDataset
 * @param outputFilename
 * @throws IOException
 */
public static void writeDatasetToTabularFile(GeneExpressionDataset geneExpressionDataset, String outputFilename) throws IOException {
    File NewFile = new File(outputFilename);
    BufferedWriter buf = new BufferedWriter(new FileWriter(NewFile));
    String Lables = "\t" + "\t" + "\t" + "\t" + Arrays.toString(geneExpressionDataset.labels);
    String Samples = "\t" + "\t" + "\t" + "\t" + Arrays.toString(geneExpressionDataset.sampleIds);
    buf.write(Lables + "\r\n" + Samples + "\r\n");
    for (int i = 0; i < geneExpressionDataset.genesNumber; i++){
        buf.write(geneExpressionDataset.geneIds[i] + "\t"+ geneExpressionDataset.geneSymbols[i] + "\t" +
                changeformat(geneExpressionDataset.dataMatrix[i]) + "\r\n");
    }
    buf.close();
}


/**
 *
 * writeTopDifferentiallyExpressedGenesToFile
 *
 * @param outputFilename
 * @param geneExpressionDataset
 * @param alpha
 * @param label1
 * @param label2
 * @return numOfDiffGenes The number of differentially expressed genes detected, having p-value lower than alpha
 * @throws IOException
 */
public static int writeTopDifferentiallyExpressedGenesToFile(String outputFilename,
        GeneExpressionDataset geneExpressionDataset, double alpha,
        String label1, String label2) throws IOException {
    double pValues[] = new double[geneExpressionDataset.genesNumber];
    int counter = 0;
    for (int i = 0; i < pValues.length; i++){
        double pval = calcTtest(geneExpressionDataset, i, label1, label2);
        if (pval < alpha){
            pValues[i] = pval;
            counter++;
        }
        else{
            continue;
        }
    }
    File tofile = new File(outputFilename);
    BufferedWriter buf = new BufferedWriter(new FileWriter(tofile));
    int j = 0;
    while (min(pValues) != -1){
        String PVal = String.format("%.6f", pValues[min(pValues)]);
        String gene_id = geneExpressionDataset.geneIds[min(pValues)];
        String gene_symbol = geneExpressionDataset.geneSymbols[min(pValues)];
        String line = String.valueOf(j) + "\t" + PVal + "\t" + gene_id + "\t" + gene_symbol;
        buf.write(line + "\r\n");
        pValues[min(pValues)] = 3.0;
        j++;
    }
    buf.close();
    return counter;
}



/**
 *
 * getDataEntriesForLabel
 *
 * Returns the entries in the 'data' array for which the corresponding entries in the 'labels' array equals 'label'
 *
 * @param data
 * @param labels
 * @param label
 * @return
 */
public static double[] getDataEntriesForLabel(float[] data, String[] labels, String label) {
    double[] array = new double[data.length];
    int counter = 0;
    for (int i = 0; i < data.length; i++){
        if (labels[i].equals(label)){
            array[counter] = data[i];
            counter++;
        }
        else{
            continue;
        }
    }return CutToCounter(array, counter);
}



/**
 *   calcTtest - returns a pValue for the t-Test
 *
 *  Returns the p-value, associated with a two-sample, two-tailed t-test comparing the means of the input arrays
 *
 *      //http://commons.apache.org/proper/commons-math/apidocs/org/apache/commons/math3/stat/inference/TTest.html#tTest(double[], double[])
 *
 * @param geneExpressionDataset
 * @param geneIndex
 * @param label1
 * @param label2
 * @return
 */
private static double calcTtest(GeneExpressionDataset geneExpressionDataset, int geneIndex, String label1, String label2) {
    double[] sample1 = getDataEntriesForLabel(geneExpressionDataset.dataMatrix[geneIndex],  geneExpressionDataset.labels, label1);
    double[] sample2 = getDataEntriesForLabel(geneExpressionDataset.dataMatrix[geneIndex],  geneExpressionDataset.labels, label2);
    return TestUtils.tTest(sample1, sample2);
}

/**
 *
 *  GeneExpressionDataset
 *  A class representing a gene expression dataset
 *
 * @author software1-2014
 *
 */
public static class GeneExpressionDataset {

    public int samplesNumber; //number of dataset samples
    public int genesNumber; // number of dataset gene probes

    public String[] sampleIds; //sample ids
    public String[] geneIds; //gene probe ids
    public String[] geneSymbols; //gene symbols
    public float[][] dataMatrix; //expression data matrix

    public String[] labels; //sample labels
    }
}


现在,它将无法编译,并且错误消息是这样的:
“ GeneExpressionAnalyzer.java:2:错误:包org.apache.commons.math3.stat.inference不存在

导入org.apach.commons.math3.stat.interference.TestUtils;

GeneExpressionAnalyzer.java:277:错误:找不到符号
返回TestUtils.tTest;
符号:变量TestUtils
位置:类GeneExpressionAnalyzer
2个错误”

我没有出什么问题,显然我添加了.jar文件,其中包含TestUtils的路径。
(这里是:http://apache.spd.co.il//commons/math/binaries/commons-math3-3.2-bin.zip

有什么见解吗?

最佳答案

如果您正在使用Eclipse,

here手动下载jar文件

在Eclipse中打开package explorer->右键单击项目
Build Path-> Configure Build Path,将打开一个窗口。

Libraries选项卡->下,单击Add External JARs。选择您下载的jar文件。单击“确定”。

就这样。现在问题可能消失了

关于java - 找不到符号:变量TestUtils,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/23268145/

10-10 11:05