问题描述
你好,
下面的程序需要很大的HTML(4GB),我的笔记本电脑有16GB的内存.
The below program takes a very big HTML (4GB), my laptop has 16GB of memory.
当我运行程序时,它可以完成工作,但是逐渐占用了越来越多的内存(查看TaskManager).
When I run the program, it does the job, but it gradually takes more and more memory (looking at the TaskManager).
为什么会这样,因为文件只有4GB?
Why would that be since the file is only 4GB in size ?
请问我在做什么错?
您会做些什么?
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using System.IO;
using System.Data.SqlClient;
namespace HTML2CSV
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
// Open File =====================================================
string userFolder = Environment.GetFolderPath(Environment.SpecialFolder.UserProfile);
string userPath = Environment.GetFolderPath(Environment.SpecialFolder.UserProfile) + "\\Desktop";
OpenFileDialog ofd = new OpenFileDialog();
ofd.InitialDirectory = userPath;
ofd.Filter = "html files (*.html)|*.txt|All files (*.*)|*.*";
ofd.FilterIndex = 2;
ofd.RestoreDirectory = true;
ofd.ShowDialog();
string fName = ofd.FileName;
label1.Text = fName;
label1.Refresh();
string theHTMLFile;
theHTMLFile = fName;
int collectedCount = 0;
string theQuery;
string con_string = "Server=127.0.0.1;Database=SHARES;Integrated Security=true;";
// Open File ==========================================================
// Get numbers ========================================================
var numbers = new List<int>();
var lines = new List<string>();
var lineCount = 0;
string num;
int counter = 0;
int numbersCount = 0;
int onePercent;
using (var reader = File.OpenText(fName))
{
while ((num = reader.ReadLine()) != null)
{
counter++;
numbersCount++;
if (num.IndexOf("</tr>") != -1)
{
//MessageBox.Show("counter: " + counter.ToString() + "num: " + num);
numbers.Add(counter);
}
}
}
// Get numbers ===============================================================
int rowNumber = 1;
int progressBarPercentage;
for (int x = 1; x < numbers.Count(); x++) // Take another line number 352, 362, 372 etc.
{
rowNumber = numbers[x];
label2.Text = x.ToString();
label2.Refresh();
onePercent = ((numbers.Count() / 100));
// =================================================================================
string lineOfHTML = "";
var srhtml = new StreamReader(theHTMLFile);
for (int y = 1; y <= rowNumber; y++) // read lines one by one and process eight rows when its found at line 352 - 9
{
lineOfHTML = srhtml.ReadLine();
lineCount++;
if (lineCount == (rowNumber - 9))
{
while (collectedCount <= 7)
{
lines.Add(srhtml.ReadLine().Replace("<td valign=\"top\" nowrap=\"nowrap\">", "").Replace("</td>", "").Replace(",", " & "));
lineCount++;
collectedCount++;
}
collectedCount = 0;
theQuery = "INSERT INTO [SHARES].[dbo].[share" + comboBox1.SelectedItem + "] (LineNumber ,[Path] ,[Account] ,[Type] ,Directory_Owner ,Permission_Simple ,Apply_To ,Inherited ,Permissions_Advanced) VALUES (" + rowNumber.ToString() + ", '" +
lines[0].Replace("'", "") + "', '" + lines[1].Replace("'", "") + "', '" + lines[2].Replace("'", "") + "', '" + lines[3].Replace("'", "") + "', '" +
lines[4].Replace("'", "") + "', '" + lines[5].Replace("'", "") + "', '" + lines[6].Replace("'", "") + "', '" + lines[7].Replace("'", "") + "')";
SqlConnection conn = new SqlConnection(con_string);
conn.Open();
SqlCommand cmd = new SqlCommand(theQuery, conn);
cmd.ExecuteNonQuery();
conn.Close();
lineCount = 0;
lines.Clear();
break;
}
}
// =================================================================================
// ProgressBar ProgressBar ProgressBar ProgressBar ProgressBar ProgressBar ProgressBar
if (x >= onePercent)
{
//progressBarPercentage = (x / numbersCount) * 100.0;
progressBarPercentage = (100 * x) / numbers.Count;
label3.Visible = true;
label3.Text = progressBarPercentage.ToString() + "%";
label3.Refresh();
//progressBar1.Value = progressBarPercentage;
progressBar1.Value = (100 * x) / numbers.Count;
progressBar1.Refresh();
}
// ProgressBar ProgressBar ProgressBar ProgressBar ProgressBar ProgressBar ProgressBar
}
progressBarPercentage = 100;
label3.Text = progressBarPercentage.ToString() + "%";
label3.Refresh();
//progressBar1.Value = progressBarPercentage;
progressBar1.Value = progressBarPercentage;
progressBar1.Refresh();
MessageBox.Show("Done !");
}
private void Form1_Load(object sender, EventArgs e)
{
label3.Visible = false;
string[] s = {"E","F","G","H","I"};
comboBox1.DataSource = s;
} //button
}
}
推荐答案
顺便说一句,可以重新组织一个集合( List< string> 或 Queue< string> ),它将保留从文件中获取的最后要求的(十)行.然后在找到</tr>"时进行处理,并删除不需要的行,然后再获取下一行.
By the way, instead of re-reading the unneeded lines from the beginning, it is possible to organise a collection (List<string> orQueue<string>) which will keep the last required (ten) lines got from file. Then process it when you find "</tr>", and remove unneeded lines before getting the next ones.
也许这还意味着您的内存得到了有效利用,即您没有为千兆字节付出白费.
这篇关于C#程序占用了所有内存的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持!