嗨,我有一个像这样的Python脚本。我正在Python 3.4.2版中运行它。
import csv
import sqlite3
def createTable(cursor, rows, tablename):
tableCreated = False
for row in rows:
if not tableCreated:
sql = "CREATE TABLE %s(ROW INTEGER PRIMARY KEY, " + ", ".join(["c%d" % (i+1) for i in range(len(row))]) + ")"
cur.execute(sql % tablename)
tableCreated = True
sql = "INSERT INTO %s VALUES(NULL, " + ", ".join(["'" + c + "'" for c in row]) + ")"
cur.execute(sql % tablename)
conn.commit()
conn = sqlite3.connect(":memory:")
cur = conn.cursor()
for filename, tablename in [("in1.csv", "CSV1"), ("out1.csv", "CSV2")]:
with open(filename, "r") as f:
reader = csv.reader(f, delimiter=',')
rows = [row for row in reader]
createTable(cur, rows, tablename)
sql = """WITH
MATCHES AS(SELECT CSV2.*
, CSV1.ROW as ROW_1
, CSV1.C4 as C4_1
, CSV1.C5 as C5_1
FROM CSV2
LEFT JOIN CSV1
ON CSV1.C4 LIKE '%' || CSV2.C2 || '%'
),
EXACT AS(CSV1.C4 = CSV1.C5
SELECT *
FROM MATCHES
WHERE C4_1 = C5_1
),
MIN_ROW AS(SELECT C1
, min(ROW_1) as ROW_1
FROM MATCHES
WHERE C1 NOT IN (SELECT C1 FROM EXACT)
GROUP BY C1, C2, C3, C4, C5
)
SELECT *
FROM EXACT
UNION
SELECT MATCHES.*
FROM MIN_ROW
INNER JOIN MATCHES
ON MIN_ROW.C1 = MATCHES.C1
AND (MIN_ROW.ROW_1 = MATCHES.ROW_1 OR MIN_ROW.ROW_1 IS NULL)
ORDER BY C1"""
for row in cur.execute(sql):
print (row)
运行这个脚本给了我
Traceback (most recent call last):
File "script.py", line 55, in <module>
for row in cur.execute(sql):
sqlite3.OperationalError: near "CSV1": syntax error
我已经在这个脚本上工作了一段时间了,我完全迷路了。如果有人可以通过工作脚本使我摆脱困境,我将非常感谢。请找到以下示例CSV文件。
in1.csv
Homo sapiens,Vertebrate Taxonomy Ontology,direct,Homo sapiens,Homo sapiens,Vertebrate Taxonomy Ontology
Homo sapiens,Systematized Nomenclature of Medicine - Clinical Terms,direct,Homo sapiens,Homo sapiens,Systematized Nomenclature of Medicine - Clinical Terms
Homo,Vertebrate Taxonomy Ontology,direct,Homo sapiens,Homo,Vertebrate Taxonomy Ontology
out1.csv
!Sample_title, !Sample_geo_accession, !Sample_status, !Sample_type, !Sample_source_name_ch1, !Sample_organism_ch1, !Sample_characteristics_ch1, !Sample_characteristics_ch1, !Sample_characteristics_ch1, !Sample_characteristics_ch1, !Sample_characteristics_ch1, !Sample_characteristics_ch1, !Sample_molecule_ch1, !Sample_extract_protocol_ch1, !Sample_label_ch1, !Sample_label_protocol_ch1, !Sample_hyb_protocol, !Sample_scan_protocol, !Sample_description, !Sample_data_processing, !Sample_platform_id
PBMC_S.aureus_MSSA_INF005, GSM173178, Public on march 16 2007, ribonucleic acid, PBMC_S. aureus, Homo sapiens, Age: 10 years- when sample taken, Gender: male, Race: Hispanic, Illness: Osteomyelitis, Treatment: Cefazolin, Pathogen: S. aureus- MSSA, total ribonucleic acid, RNeasy mini, biotin, Biotinylated complementary rna were prepared according to the standard Affymetrix protocol., Standard Affymetrix protocol., GeneChips were scanned using the Agilent GeneArray 2500 Scanner., The subject was infected with S. aureus- MSSA., The data were analyzed with Microarray Suite version 5.0 (meconium aspiration syndrome 5.0) using Affymetrix default analysis settings and global scaling as normalization method. The trimmed mean target intensity of each array was arbitrarily set to 500., GPL96
PBMC_S.pneumoniae_INF009, GSM173179, Public on march 16 2007, ribonucleic acid, PBMC_S. pneumoniae, Homo sapiens, Age:4 months- when sample taken, Gender: male, Race: Caucasian, Illness: Abscess, Treatment: Cefazolin, Pathogen: S. pneumoniae, total ribonucleic acid, RNeasy mini, biotin, Biotinylated complementary rna were prepared according to the standard Affymetrix protocol., Standard Affymetrix protocol., GeneChips were scanned using the Agilent GeneArray 2500 Scanner., The subject was infected with S. pneumoniae., The data were analyzed with Microarray Suite version 5.0 (meconium aspiration syndrome 5.0) using Affymetrix default analysis settings and global scaling as normalization method. The trimmed mean target intensity of each array was arbitrarily set to 500., GPL96
最佳答案
数据库在CSV1.C4 = CSV1.C5
的定义中抱怨多余的EXACT
。