summaryrefslogtreecommitdiff
path: root/src/lua51/lstring.c
blob: 73d34c47707187cd892225314be110bce43a6b26 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
/*
** $Id: lstring.c,v 2.8.1.1 2007/12/27 13:02:25 roberto Exp $
** String table (keeps all strings handled by Lua)
** See Copyright Notice in lua.h
*/


#include <string.h>

#define lstring_c
#define LUA_CORE

#include "lua.h"

#include "lmem.h"
#include "lobject.h"
#include "lstate.h"
#include "lstring.h"



//c 重新分配global_State->strt每个桶的数据量
//c 当桶的大小远超字符串容量,重新分配大小,删掉一些空桶(空位)
void luaS_resize (lua_State *L, int newsize) {
  GCObject **newhash;
  stringtable *tb;
  int i;
  if (G(L)->gcstate == GCSsweepstring)//如果GC在回收字符串阶段,不要rehash
    return;  /* cannot resize during GC traverse */
  newhash = luaM_newvector(L, newsize, GCObject *); //建立一个新的散列桶,并清空
  tb = &G(L)->strt;//旧的散列桶
  for (i=0; i<newsize; i++) newhash[i] = NULL;
  //遍历就的散列桶,并填入新的散列桶
	/* rehash */
  for (i=0; i<tb->size; i++) {
    GCObject *p = tb->hash[i];
    while (p) {  /* for each node in the list */
      GCObject *next = p->gch.next;  /* save next */
      unsigned int h = gco2ts(p)->hash;
			//新的散列值
      int h1 = lmod(h, newsize);  /* new position */
      lua_assert(cast_int(h%newsize) == lmod(h, newsize));
      p->gch.next = newhash[h1];//c 接在同一个hash的最前面  /* chain it */
      newhash[h1] = p; //c 将p作为桶内第一个存起来
      p = next;
    }
  }
  luaM_freearray(L, tb->hash, tb->size, TString *);
  tb->size = newsize;
  tb->hash = newhash;
}


//c 新建字符串
static TString *newlstr (lua_State *L, const char *str, size_t l,
                                       unsigned int h) {
  TString *ts;
  stringtable *tb;
  if (l+1 > (MAX_SIZET - sizeof(TString))/sizeof(char))
    luaM_toobig(L);
  ts = cast(TString *, luaM_malloc(L, (l+1)*sizeof(char)+sizeof(TString)));
  ts->tsv.len = l;
  ts->tsv.hash = h;
  ts->tsv.marked = luaC_white(G(L));
  ts->tsv.tt = LUA_TSTRING;
  ts->tsv.reserved = 0;
  memcpy(ts+1, str, l*sizeof(char));
  ((char *)(ts+1))[l] = '\0';  /* ending 0 */
  tb = &G(L)->strt;
	// 计算hash值
  h = lmod(h, tb->size);
  ts->tsv.next = tb->hash[h];  /* chain new entry */
  tb->hash[h] = obj2gco(ts);
  tb->nuse++;
	//c 给字符串通扩容,如果字符串数量大于桶容量
	//c 给桶扩容为2倍
  if (tb->nuse > cast(lu_int32, tb->size) && tb->size <= MAX_INT/2)
    luaS_resize(L, tb->size*2);  /* too crowded */
  return ts;
}


//c 创建字符串
TString *luaS_newlstr (lua_State *L, const char *str, size_t l) {
  GCObject *o;
  unsigned int h = cast(unsigned int, l);  /* seed */
  size_t step = (l>>5)+1;  /* if string is too long, don't hash all its chars */
  size_t l1;
	//c 如果字符串非常长,不要逐位计算散列值,每step步取一个字符计算即可
  for (l1=l; l1>=step; l1-=step)  /* compute hash */
    h = h ^ ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1]));
  for (o = G(L)->strt.hash[lmod(h, G(L)->strt.size)];
       o != NULL;
       o = o->gch.next) {
    TString *ts = rawgco2ts(o);
    if (ts->tsv.len == l && (memcmp(str, getstr(ts), l) == 0)) {//c 如果散列值相同,用memcmp快速对比
			//c 如果字符串被标记了回收(gch.marked),重新标记它不要回收
      /* string may be dead */
      if (isdead(G(L), o)) changewhite(o);
      return ts;
    }
  }
	//c 如果global->strt里没有这个字符串,新建立一个,并加到哈希桶里 
  return newlstr(L, str, l, h);  /* not found */
}

//c 创建userdata
//c userdata的gc和普通对象(udata和uv除外的)是分开的
Udata *luaS_newudata (lua_State *L, size_t s, Table *e) {
  Udata *u;
  if (s > MAX_SIZET - sizeof(Udata))
    luaM_toobig(L);

	// 创建并赋值
  u = cast(Udata *, luaM_malloc(L, s + sizeof(Udata)));
	//c udata和普通对象在GC上的区别在于不调用luaC_link,因为不会加在
	//c G(L)->rootgc链上,而是加在G(L)->mainthread后面
	//c udata标记为 white
  u->uv.marked = luaC_white(G(L));  /* is not finalized */
  u->uv.tt = LUA_TUSERDATA;
  u->uv.len = s;
  u->uv.metatable = NULL;
  u->uv.env = e;

	// 可见所有userdata都跟在mainthread之后第一个
	// 这样是为了方便,是一个hack
	// 由于userdata可能会定义了__gc,所以统一处理
  /* chain it on udata list (after main thread) */
  u->uv.next = G(L)->mainthread->next;
  G(L)->mainthread->next = obj2gco(u);   

	return u;
}