需求
把不同客户提供Excel 直接导入到系统中生成对应的收货单或是出货单。后端创建收货端和出货单的接口已经有现成的webservice或是标准的xml;这类需要做的就是把客户提供不同种类的Excel mapping成标准格式。
要重点解决问题
不同格式的excel如何找到对应的数据项,比如一个Excel中需要字段分别在不同的sheet或是不同的位置上。
解决方案
第一个定位配置信息,sheet-name:数据从哪个sheet中读取,默认sheet1,start-tag:固定标识,查找Excel中一些特殊文本信息来定位具体的celladdress(行,列),data-offset:设置一个偏移量,在找到具体内容的地址后可能真正需要数据在后面,那就需要设置一个偏移量待读取信息, end-tag:结束位置,用于循环读取的范围。
第二个映射字段明,XmlNode Name:就是目标字段明,data-field:Excel中对应的字段名称(含有表头的行),data-type:目标字段的类型,data-formatter:格式化截取excel Cell中的内容 比如:需要通过substring,或splitl来取其中的内容。
第三个表示单个表头,还是循环的表体 replicate="true" 表示需要循环读取。
大致的处理过程
先根据配置规则把Excel中需要的信息提取出来并生成一个XML文档,如果标准的接口XML结构和数据都比较复杂,那么还需要使用XSLT语言来做更复杂的mapping,满足后端服务的要求。
实际运行的过程
原始Excel
配置规则XML
转换成初步XML
实现的代码
代码非常的简单,还是第一个版本,以后再慢慢优化和重构
1 class Program 2 { 3 static async Task Main(string[] args) 4 { 5 var path = @"d:\9C箱单0000880680.xlsx"; 6 var configpath = @"d:\XslImportRule1.xml"; 7 var xdoc = XDocument.Load(configpath); 8 var root = xdoc.Root.Name; 9 var descxml = new XDocument(); 10 descxml.Add(new XElement(xdoc.Root.Name)); 11 var workbook = new XSSFWorkbook(path); 12 Process(workbook,null, xdoc.Root, 0, descxml.Root,null); 13 descxml.Save("d:\\output.xml"); 14 return; 15 } 16 static void Process(IWorkbook book, ISheet sheet, XElement element, int depth, XElement root,DataRow dr) 17 { 18 var pelment = element.Parent; 19 var name = element.Name; 20 var atts = element.Attributes(); 21 var replicate = atts.Where(x => x.Name == "replicate").FirstOrDefault()?.Value; 22 var sheetname = atts.Where(x => x.Name == "sheet-name").FirstOrDefault()?.Value; 23 var starttag = atts.Where(x => x.Name == "start-tag").FirstOrDefault()?.Value; 24 var start = atts.Where(x => x.Name == "start").FirstOrDefault()?.Value; 25 var endtag = atts.Where(x => x.Name == "end-tag").FirstOrDefault()?.Value; 26 var end = atts.Where(x => x.Name == "end").FirstOrDefault()?.Value; 27 var fieldname = atts.Where(x => x.Name == "data-field").Select(x => x.Value).FirstOrDefault(); 28 var datatype = atts.Where(x => x.Name == "data-type").Select(x => x.Value); 29 var defaultvalue = atts.Where(x => x.Name == "data-default").FirstOrDefault()?.Value; 30 var formatter = atts.Where(x => x.Name == "data-formatter").FirstOrDefault()?.Value; 31 var offset = atts.Where(x => x.Name == "data-offset").FirstOrDefault()?.Value; 32 XElement copyelement = null; 33 34 //if (element.Parent != null ) 35 //{ 36 // copyelement = new XElement(name); 37 // root.Add(copyelement); 38 //} 39 if (!string.IsNullOrEmpty(replicate) && !string.IsNullOrEmpty(sheetname)) { 40 sheet = book.GetSheet(sheetname); 41 } 42 43 if (!element.HasElements) 44 { 45 copyelement = new XElement(name); 46 root.Add(copyelement); 47 // element is child with no descendants 48 if (dr == null) 49 { 50 CellAddress celladdress = null; 51 if (!string.IsNullOrEmpty(starttag)) 52 { 53 celladdress = findXslx(sheet, starttag); 54 } 55 else if (!string.IsNullOrEmpty(start)) 56 { 57 celladdress = new CellAddress(new CellReference(start)); 58 } 59 if (celladdress != null) 60 { 61 var r = 0; 62 var c = 0; 63 if (!string.IsNullOrEmpty(offset)) 64 { 65 var sp = offset.Split(';'); 66 foreach (var ts in sp) 67 { 68 var sparray = ts.Split(':'); 69 if (sparray[0].Equals("c", StringComparison.OrdinalIgnoreCase)) 70 { 71 c = Convert.ToInt32(sparray[1]); 72 } 73 else 74 { 75 r = Convert.ToInt32(sparray[1]); 76 } 77 } 78 } 79 var cell = sheet.GetRow(celladdress.Row + r).GetCell(celladdress.Column + c); 80 var val = getCellValue(cell); 81 if (string.IsNullOrEmpty(val) && !string.IsNullOrEmpty(defaultvalue)) 82 { 83 val = defaultvalue; 84 } 85 if (!string.IsNullOrEmpty(val) && !string.IsNullOrEmpty(formatter)) 86 { 87 var codescript = formatter.Replace("$", "\"" + val + "\""); 88 var fval = CSharpScript.EvaluateAsync<string>(codescript).Result; 89 val = fval; 90 } 91 copyelement.SetValue(val); 92 } 93 else if (!string.IsNullOrEmpty(defaultvalue)) 94 { 95 copyelement.SetValue(defaultvalue); 96 } 97 } 98 else 99 { 100 if(dr.Table.Columns.Contains(fieldname)) 101 { 102 var val = dr[fieldname].ToString(); 103 if (string.IsNullOrEmpty(val) && !string.IsNullOrEmpty(defaultvalue)) 104 { 105 val = defaultvalue; 106 107 } 108 copyelement.SetValue(val); 109 } 110 else if(!string.IsNullOrEmpty(defaultvalue)) 111 { 112 copyelement.SetValue(defaultvalue); 113 } 114 } 115 116 } 117 else 118 { 119 depth++; 120 if (replicate == "true") 121 { 122 var datatable= filldatatable(sheet, starttag, start, endtag, end, offset); 123 if (datatable.Rows.Count > 0) 124 { 125 foreach (DataRow datarow in datatable.Rows) 126 { 127 copyelement = new XElement(name); 128 foreach (var child in element.Elements()) 129 { 130 if (copyelement != null) 131 { 132 Process(book, sheet, child, depth, copyelement, datarow); 133 } 134 else 135 { 136 Process(book, sheet, child, depth, root, datarow); 137 } 138 139 } 140 root.Add(copyelement); 141 } 142 } 143 } 144 else 145 { 146 if (element.Parent != null) 147 { 148 copyelement = new XElement(name); 149 root.Add(copyelement); 150 } 151 foreach (var child in element.Elements()) 152 { 153 if (copyelement != null) 154 { 155 Process(book,sheet, child, depth, copyelement,null); 156 } 157 else 158 { 159 Process(book,sheet, child, depth, root,null); 160 } 161 162 } 163 } 164 165 depth--; 166 } 167 } 168 169 private static DataTable filldatatable(ISheet sheet, string starttag, string start, string endtag, string end, string offset) 170 { 171 CellAddress startaddress = null; 172 CellAddress endaddress = null; 173 if (!string.IsNullOrEmpty(starttag)) 174 { 175 startaddress = findXslx(sheet, starttag); 176 } 177 else if (!string.IsNullOrEmpty(start)) 178 { 179 startaddress = new CellAddress(new CellReference(start)); 180 } 181 else 182 { 183 startaddress = new CellAddress(new CellReference("A0")); 184 } 185 if (!string.IsNullOrEmpty(endtag)) 186 { 187 endaddress = findXslx(sheet, endtag); 188 } 189 else if (!string.IsNullOrEmpty(end)) 190 { 191 endaddress = new CellAddress(new CellReference(end)); 192 } 193 else 194 { 195 endaddress = null; 196 } 197 var offsetr = 0; 198 var offsetc = 0; 199 if (!string.IsNullOrEmpty(offset)) 200 { 201 var sp = offset.Split(';'); 202 foreach (var ts in sp) 203 { 204 var sparray = ts.Split(':'); 205 if (sparray[0].Equals("c", StringComparison.OrdinalIgnoreCase)) 206 { 207 offsetc = Convert.ToInt32(sparray[1]); 208 } 209 else 210 { 211 offsetr = Convert.ToInt32(sparray[1]); 212 } 213 } 214 } 215 var firstrow = startaddress == null ? sheet.FirstRowNum : startaddress.Row + offsetr; 216 var lastrow = (endaddress == null) ? sheet.LastRowNum : endaddress.Row; 217 var table = new DataTable(); 218 var lastcell = 0; //row.LastCellNum; 219 var firstcell = 0; //row.FirstCellNum + offsetc; 220 for (int r = firstrow; r < lastrow; r++) 221 { 222 var row = sheet.GetRow(r); 223 if (row == null) continue; 224 225 if (r == firstrow) 226 { 227 lastcell = row.LastCellNum; 228 firstcell = row.FirstCellNum + offsetc; 229 for (int c = firstcell; c < lastcell; c++) 230 { 231 var cell = row.GetCell(c); 232 if (cell == null) continue; 233 var strval = getCellValue(cell).Trim(); 234 if (!string.IsNullOrEmpty(strval)) 235 { 236 table.Columns.Add(new DataColumn(strval)); 237 } 238 } 239 } 240 else 241 { 242 var dataRow = table.NewRow(); 243 var array = new string[table.Columns.Count]; 244 //for (var c = 0; c < table.Columns.Count; c++) 245 //{ 246 // var cell = row.GetCell(firstcell+c); 247 // var val = getCellValue(cell).Trim(); 248 // array[c] = val; 249 //} 250 for (int c = firstcell; c < lastcell; c++) 251 { 252 var cell = row.GetCell(c); 253 var val = getCellValue(cell).Trim(); 254 array[c- firstcell] = val; 255 } 256 dataRow.ItemArray = array; 257 table.Rows.Add(dataRow); 258 } 259 } 260 return table; 261 } 262 263 private static CellAddress findXslx(ISheet sheet, string key) 264 { 265 var lastrow = sheet.LastRowNum; 266 var firstrow = sheet.FirstRowNum; 267 for (int r = firstrow; r < lastrow; r++) 268 { 269 var row = sheet.GetRow(r); 270 if (row == null) continue; 271 var lastcell = row.LastCellNum; 272 var firstcell = row.FirstCellNum; 273 for (int c = firstcell; c < lastcell; c++) 274 { 275 var cell = row.GetCell(c); 276 if (cell == null) continue; 277 var strval = getCellValue(cell).Trim(); 278 //if (strval.Trim().Equals(key, StringComparison.OrdinalIgnoreCase)) 279 //{ 280 // return cell.Address; 281 //} 282 if (match(key, strval)) 283 { 284 return cell.Address; 285 } 286 } 287 } 288 return null; 289 } 290 private static string getCellValue(ICell cell) 291 { 292 if (cell == null) 293 { 294 return string.Empty; 295 } 296 var dataFormatter = new DataFormatter(CultureInfo.CurrentCulture); 297 298 // If this is not part of a merge cell, 299 // just get this cell's value like normal. 300 if (!cell.IsMergedCell) 301 { 302 return dataFormatter.FormatCellValue(cell); 303 } 304 305 // Otherwise, we need to find the value of this merged cell. 306 else 307 { 308 // Get current sheet. 309 var currentSheet = cell.Sheet; 310 311 // Loop through all merge regions in this sheet. 312 for (int i = 0; i < currentSheet.NumMergedRegions; i++) 313 { 314 var mergeRegion = currentSheet.GetMergedRegion(i); 315 316 // If this merged region contains this cell. 317 if (mergeRegion.FirstRow <= cell.RowIndex && cell.RowIndex <= mergeRegion.LastRow && 318 mergeRegion.FirstColumn <= cell.ColumnIndex && cell.ColumnIndex <= mergeRegion.LastColumn) 319 { 320 // Find the top-most and left-most cell in this region. 321 var firstRegionCell = currentSheet.GetRow(mergeRegion.FirstRow) 322 .GetCell(mergeRegion.FirstColumn); 323 324 // And return its value. 325 return dataFormatter.FormatCellValue(firstRegionCell); 326 } 327 } 328 // This should never happen. 329 throw new Exception("Cannot find this cell in any merged region"); 330 } 331 } 332 333 static bool match(string pattern, string input) 334 { 335 if (String.Compare(pattern, input) == 0) 336 { 337 return true; 338 } 339 else if (String.IsNullOrEmpty(input)) 340 { 341 if (String.IsNullOrEmpty(pattern.Trim(new Char[1] { '*' }))) 342 { 343 return true; 344 } 345 else 346 { 347 return false; 348 } 349 } 350 else if (pattern.Length == 0) 351 { 352 return false; 353 } 354 else if (pattern[0] == '?') 355 { 356 return match(pattern.Substring(1), input.Substring(1)); 357 } 358 else if (pattern[pattern.Length - 1] == '?') 359 { 360 return match(pattern.Substring(0, pattern.Length - 1), 361 input.Substring(0, input.Length - 1)); 362 } 363 else if (pattern[0] == '*') 364 { 365 if (match(pattern.Substring(1), input)) 366 { 367 return true; 368 } 369 else 370 { 371 return match(pattern, input.Substring(1)); 372 } 373 } 374 else if (pattern[pattern.Length - 1] == '*') 375 { 376 if (match(pattern.Substring(0, pattern.Length - 1), input)) 377 { 378 return true; 379 } 380 else 381 { 382 return match(pattern, input.Substring(0, input.Length - 1)); 383 } 384 } 385 else if (pattern[0] == input[0]) 386 { 387 return match(pattern.Substring(1), input.Substring(1)); 388 } 389 return false; 390 } 391 } 392 }
代码库
https://github.com/neozhu/excelcompleximport
最近还会继续更新